ParallelDomainBase.cc 74.4 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
22

#include "petscksp.h"
23
24
25

namespace AMDiS {

26
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
27
  {    
28
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
29
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
30
31
32
33

    return 0;
  }

34
35
36
37
38
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

39
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
40
41
42
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
43
44
    : iterationIF(iIF),
      timeIF(tIF),
45
      name(iIF->getName()),
46
47
      feSpace(fe),
      mesh(fe->getMesh()),
48
      refinementManager(refineManager),
49
      initialPartitionMesh(true),
50
      nRankDofs(0),
51
      rstart(0),
52
53
      nComponents(1),
      deserialized(false)
54
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
55
56
57
58
59
60
61
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

62
63
64
65
66
67
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

68
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
69
  {
70
71
72
73
74
75
76
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
77
78
      return;

79
80
81
82
83
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

84
85
86
87
88
89
90
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
91
92
93
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
    DbgCreateElementMap(elMap);
94
95
96

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
97
#endif
98

99
    // === Create new global and local DOF numbering. ===
100

101
    // Set of all DOFs of the rank.
102
    std::vector<const DegreeOfFreedom*> rankDofs;
103
    // Number of DOFs in ranks partition that are owned by the rank.
104
    nRankDofs = 0;
105
    // Number of all DOFs in the macro mesh.
106
    int nOverallDOFs = 0;
107

108
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
109

Thomas Witkowski's avatar
Thomas Witkowski committed
110
111
    // === Create interior boundary information ===

112
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
113

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
114
115
116
117
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
118
#if (DEBUG != 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
119
    DbgTestElementMap(elMap);
Thomas Witkowski's avatar
Thomas Witkowski committed
120
121
    DbgTestInteriorBoundary();
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
122

123
    // === Reset all DOFAdmins of the mesh. ===
124

125
    updateDofAdmins();
126

127
128
129
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
130

131

132
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
133

Thomas Witkowski's avatar
Thomas Witkowski committed
134
    int globalRefinement = 0;
135
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
136

Thomas Witkowski's avatar
Thomas Witkowski committed
137
138
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
139

140
141
142
143
144
#if (DEBUG != 0)
      elMap.clear();
      DbgCreateElementMap(elMap);
#endif

145
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
146
147
148
149
150
151

      updateDofAdmins();

#if (DEBUG != 0)
      DbgTestElementMap(elMap);
#endif
152

153
      // === Update periodic mapping, if there are periodic boundaries. ===
154
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
155
156
    }

157
    //  exit(0);
158

Thomas Witkowski's avatar
Thomas Witkowski committed
159
#if (DEBUG != 0)
160
    DbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
161
#endif
162

163
    nRankRows = nRankDofs * nComponents;
164
    nOverallRows = nOverallDOFs * nComponents;
165
166
  }

167
168

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
169
  {}
170

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

189

190
191
192
193
194
195
196
197
198
199
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
200
	("Mesh is already refined! This does not work with parallelization!\n");
201
202
203
204
205
206
207
208
209
210
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

211

212
213
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
214
  {
215
216
217
218
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

219
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
220
221
222
223
224
225
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

226
    typedef traits::range_generator<row, Matrix>::type cursor_type;
227
228
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

229
230
231
232
233
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

234
235
236
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

237
238
239
240
241
242
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

243
      // Global index of the current row dof.
244
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
245
      // Test if the current row dof is a periodic dof.
246
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
247

248
249
250
251
252

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

253
254
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
255
256

	// Set only non null values.
257
	if (value(*icursor) != 0.0) {
258
	  // Global index of the current column index.
259
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
260
	  // Calculate the exact position of the column index in the petsc matrix.
261
262
	  int colIndex = globalColDof * dispMult + dispAddCol;

263
264
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
265
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
266
267
268
269
270
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
271
 	    cols.push_back(colIndex);
272
 	    values.push_back(value(*icursor) * scalFactor);
273

274
275
276
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
277
278
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
279
280
 	      values.push_back(value(*icursor) * scalFactor);
	    }
281
 	  } else {
282
	    // Neigher row nor column dof index is periodic, simple add entry.
283
284
285
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
286
	}
287
288
      }

289
290
291
292
293

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
294
      int rowIndex = globalRowDof * dispMult + dispAddRow;
295
      
296
      if (periodicRow) {
297
298
299
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
300
	
301
	int diagIndex = -1;
302
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
303
304
305
306
307
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
308
309
310
	  else
	    diagIndex = i;
	}
311
312
313
314
315
316
317
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
318
319
320
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

321
	// Send the row to all periodic row indices.
322
323
324
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
325
326
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
327
328
329
	}

      } else {
330
331
332
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
333
      }    
334
    }
335
  }
336

337

338
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
339
340
					int dispMult, int dispAdd)
  {
341
    // Traverse all used dofs in the dof vector.
342
343
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
344
      // Calculate global row index of the dof.
345
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
346
      // Calculate petsc index of the row dof.
347
348
349
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
350
351
352
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
353
354
355
356
357
358
359
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
360

361
      } else {
362
	// The dof index is not periodic.
363
364
365
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
366
    }    
367
368
  }

369

370
371
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
372
373
374
375
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

376
377
378
379
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

380
381
382
383
384
385
386
387
388
389
390
391
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

392
393
394
395
396
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

397
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
398
399
400

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
401
402
403
404
405
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
406
407
408
409
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

410
411
412
413
414
415
416
417
418
419
420
421
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

422
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
423
424
425
426
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
427
428
    int o_nnz[nRankRows];

429
430
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

431
    for (int i = 0; i < nRankRows; i++) {
432
      d_nnz[i] = 0;
433
434
      o_nnz[i] = 0;
    }
435

436
437
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
438
439
440
441
442
443
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
444
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
445
446
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
447
448
449
450
451
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

452
453
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
454
455
456
457
458
459
460
461
462
463

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
464
	      
465
466
467
468
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
469

470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
505
506
	  }
	}
507
508
509
510
511
512
513
514
515
516
517
518
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
519

520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

552
      if (nSend > 0)
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
579

580
581
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
582
 	delete [] sendBuffers[j];
583
584
585
586
587
588
589
590

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
591

592
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
593

594
595
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
596
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
597
	    o_nnz[r]++;
598
599
	  else
	    d_nnz[r]++;
600
601
602
603
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
604
605

      i++;
606
    }
607
608

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
609
610
611
612
613
614
615
616
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
617

618
619
620
621
622
623
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
624
	if ((*mat)[i][j])
625
626
627
628
629
630
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
631
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
632

633
634
635
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

636
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
637
638
639
  }


640
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
641
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
642
643
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

644
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
645

646
    KSP ksp;
647
    KSPCreate(PETSC_COMM_WORLD, &ksp);
648
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
649
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
650
    KSPSetType(ksp, KSPBCGS);
651
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
652
653
654
655
656
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

657
    for (int i = 0; i < nRankDofs; i++)
658
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
659
660
661

    VecRestoreArray(petscSolVec, &vecPointer);

662
663
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
664
665
666

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
667
668
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
669
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
670
	 sendIt != sendDofs.end(); ++sendIt, i++) {
671
672
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
673

674
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
675
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
676

Thomas Witkowski's avatar
Thomas Witkowski committed
677
678
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
679
680
681
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
682
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
683
	 recvIt != recvDofs.end(); ++recvIt, i++) {
684
685
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
686

Thomas Witkowski's avatar
Thomas Witkowski committed
687
688
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
689
690
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
691
692
693

    MPI::Request::Waitall(requestCounter, request);

694
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
695
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
696
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
697
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
698
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
699
700
701
702

      delete [] recvBuffers[i];
    }
    
703
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
704
      delete [] sendBuffers[i];
705
706

    MatDestroy(petscMatrix);
707
708
709
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
710
711
  }

712

713
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
714
715
716
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

717
718
719
720
721
722
723
724
725
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

726
    // === Init Petsc solver. ===
727

728
    KSP solver;
729
730
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
731
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
732
733
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
734
    KSPSetFromOptions(solver);
735
736
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
737

738
739
740

    // === Run Petsc. ===

741
    KSPSolve(solver, petscRhsVec, petscSolVec);
742

743
    // === Transfere values from Petsc's solution vectors to the dof vectors.
744
745
746
747
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
748
      DOFVector<double> *dofvec = vec.getDOFVector(i);
749
      for (int j = 0; j < nRankDofs; j++)
750
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
751
752
753
754
    }

    VecRestoreArray(petscSolVec, &vecPointer);

755
756
757

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
758
    synchVectors(vec);
759

760
761
762

    // === Print information about solution process. ===

763
764
765
766
767
768
769
770
771
772
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

773
774
775

    // === Destroy Petsc's variables. ===

776
    MatDestroy(petscMatrix);
777
778
779
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
780
781
782
783
784
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

    stdMpi.startCommunication();
#endif

#if 1

809
810
811
812
813
814
815
816
817
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
818
819
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
820
821
822

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
823
	DOFVector<double> *dofvec = vec.getDOFVector(j);
824
825
826
827
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

828
829
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
852
	DOFVector<double> *dofvec = vec.getDOFVector(j);
853
 	for (int k = 0; k < nRecvDOFs; k++)
854
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
855
856
857
858
859
860
861
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
862
#endif
863
864
  }

865
866
867
868
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
869
    SerUtil::serialize(out, vecSize);
870
871
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
872
      SerUtil::serialize(out, dofIndex);
873
874
875
876
877
878
879
880
881
882
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
883
    SerUtil::deserialize(in, vecSize);
884
885
886
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
887
      SerUtil::deserialize(in, dofIndex);
888
889
890
891
892
893
894
895
896
897
898
899

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
900
    SerUtil::serialize(out, mapSize);
901
902
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
903
      SerUtil::serialize(out, rank);
904
905
906
907
908
909
910
911
912
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
913
    SerUtil::deserialize(in, mapSize);
914
915
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
916
      SerUtil::deserialize(in, rank);
917
918
919
920
      deserialize(in, data[rank], dofMap);      
    }
  }

921

922
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
940
	if (partitionData->getLevel() == 0)
941
	  elNum = element->getIndex();
942
	
Thomas Witkowski's avatar
Thomas Witkowski committed
943
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
944
945
946
947
948
949
950
951
952
953
954
955
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

956

957
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
958
959
960
961
962
963
964
965
966
967
968
969
970
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

971

972
  void ParallelDomainBase::createInteriorBoundaryInfo()
973
  {
974
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
975

976
977
978
979
980
    // To be general, we have to use elInfo->getBoundary(EDGE/FACE, i) instead of
    // elInfo->getBoundar(i).
    TEST_EXIT(mesh->getDim() == 2)
      ("getBoundary(i) returns always i-th edge, generalize for 3d!\n");

981
982
983

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
984

985
    TraverseStack stack;
986
987
988
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
989
990
991
992
993
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
994

995
      // Check, if the element is within rank's partition.
996
997
998
999
1000
1001
1002
      if (partitionData->getPartitionStatus() == IN) {
	for (int i = 0; i < 3; i++) {
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->getElementData(PARTITION_ED));
1003

1004
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1005

1006
1007
1008
1009
1010
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

1011
	    AtomicBoundary bound;	    	    
1012
1013
1014
1015
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
	    bound.rankObj.subObj = EDGE;
	    bound.rankObj.ithObj = i;
1016
1017
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
1018
1019
1020
1021
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
	    bound.neighObj.subObj = EDGE;
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
1022
	    
1023
1024
1025
	    // i == 2  =>  getSideOfNeighbour(i) == 2
	    TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
	      ("Should not happen!\n");
1026

1027
	    // Get rank number of the neighbouring element.
1028
1029
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1030
1031
1032
1033
1034
1035
1036
1037
1038

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(i))) {	      
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1050
1051
1052
1053
1054
1055
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1056

1057
1058
1059
1060

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1061

Thomas Witkowski's avatar
Thomas Witkowski committed
1062
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1063

1064
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1065
1066
1067
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1068
1069
    int requestCounter = 0;

1070

1071
1072
1073
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1074
1075
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1076
      int nSendInt = rankIt->second.size();
1077
1078
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1079
1080
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1081
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1082
1083
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1084
      request[requestCounter++] =
1085
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1086
1087
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1088
1089
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1090
      int nRecvInt = rankIt->second.size();
1091
      int *