ParallelDomainBase.cc 75.3 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
21

#include "petscksp.h"
22
23
24

namespace AMDiS {

25
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
26
  {    
27
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
28
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
29
30
31
32

    return 0;
  }

33
34
35
36
37
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

38
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
39
40
41
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
42
43
    : iterationIF(iIF),
      timeIF(tIF),
44
      name(iIF->getName()),
45
46
      feSpace(fe),
      mesh(fe->getMesh()),
47
      refinementManager(refineManager),
48
      initialPartitionMesh(true),
49
      nRankDofs(0),
50
      rstart(0),
51
52
      nComponents(1),
      deserialized(false)
53
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
54
55
56
57
58
59
60
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

61
62
63
64
65
66
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

67
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
68
  {
69
70
71
72
73
74
75
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
76
77
      return;

78
79
80
81
82
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

83
84
85
86
87
88
89
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
90
91
92
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
    DbgCreateElementMap(elMap);
93
94
95

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
96
#endif
97

98
    // === Create new global and local DOF numbering. ===
99

100
    // Set of all DOFs of the rank.
101
    std::vector<const DegreeOfFreedom*> rankDofs;
102
    // Number of DOFs in ranks partition that are owned by the rank.
103
    nRankDofs = 0;
104
    // Number of all DOFs in the macro mesh.
105
    int nOverallDOFs = 0;
106

107
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
108

Thomas Witkowski's avatar
Thomas Witkowski committed
109
110
    // === Create interior boundary information ===

111
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
112

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
113
114
115
116
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
117
#if (DEBUG != 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
118
    DbgTestElementMap(elMap);
Thomas Witkowski's avatar
Thomas Witkowski committed
119
120
    DbgTestInteriorBoundary();
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
121

122
    // === Reset all DOFAdmins of the mesh. ===
123

124
    updateDofAdmins();
125

126
127
128
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
129

130

131
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
132

Thomas Witkowski's avatar
Thomas Witkowski committed
133
134
135
    int globalRefinement = 0;
    GET_PARAMETER(0, "testgr", "%d", &globalRefinement);

Thomas Witkowski's avatar
Thomas Witkowski committed
136
137
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
138

139
140
141
142
143
#if (DEBUG != 0)
      elMap.clear();
      DbgCreateElementMap(elMap);
#endif

144
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
145
146
147
148
149
150

      updateDofAdmins();

#if (DEBUG != 0)
      DbgTestElementMap(elMap);
#endif
151

152
      // === Update periodic mapping, if there are periodic boundaries. ===
153
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
154
155
    }

156

Thomas Witkowski's avatar
Thomas Witkowski committed
157
#if (DEBUG != 0)
158
    DbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
159
#endif
160

161
    nRankRows = nRankDofs * nComponents;
162
    nOverallRows = nOverallDOFs * nComponents;
163
164
  }

165
166

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
167
  {}
168

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

187

188
189
190
191
192
193
194
195
196
197
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
198
	("Mesh is already refined! This does not work with parallelization!\n");
199
200
201
202
203
204
205
206
207
208
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

209

210
211
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
212
  {
213
214
215
216
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

217
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
218
219
220
221
222
223
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

224
    typedef traits::range_generator<row, Matrix>::type cursor_type;
225
226
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

227
228
229
230
231
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

232
233
234
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

235
236
237
238
239
240
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

241
      // Global index of the current row dof.
242
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
243
      // Test if the current row dof is a periodic dof.
244
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
245

246
247
248
249
250

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

251
252
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
253
254

	// Set only non null values.
255
	if (value(*icursor) != 0.0) {
256
	  // Global index of the current column index.
257
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
258
	  // Calculate the exact position of the column index in the petsc matrix.
259
260
	  int colIndex = globalColDof * dispMult + dispAddCol;

261
262
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
263
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
264
265
266
267
268
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
269
 	    cols.push_back(colIndex);
270
 	    values.push_back(value(*icursor) * scalFactor);
271

272
273
274
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
275
276
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
277
278
 	      values.push_back(value(*icursor) * scalFactor);
	    }
279
 	  } else {
280
	    // Neigher row nor column dof index is periodic, simple add entry.
281
282
283
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
284
	}
285
286
      }

287
288
289
290
291

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
292
      int rowIndex = globalRowDof * dispMult + dispAddRow;
293
      
294
      if (periodicRow) {
295
296
297
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
298
	
299
	int diagIndex = -1;
300
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
301
302
303
304
305
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
306
307
308
	  else
	    diagIndex = i;
	}
309
310
311
312
313
314
315
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
316
317
318
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

319
	// Send the row to all periodic row indices.
320
321
322
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
323
324
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
325
326
327
	}

      } else {
328
329
330
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
331
      }    
332
    }
333
  }
334

335

336
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
337
338
					int dispMult, int dispAdd)
  {
339
    // Traverse all used dofs in the dof vector.
340
341
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
342
      // Calculate global row index of the dof.
343
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
344
      // Calculate petsc index of the row dof.
345
346
347
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
348
349
350
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
351
352
353
354
355
356
357
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
358

359
      } else {
360
	// The dof index is not periodic.
361
362
363
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
364
    }    
365
366
  }

367

368
369
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
370
371
372
373
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

374
375
376
377
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

378
379
380
381
382
383
384
385
386
387
388
389
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

390
391
392
393
394
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

395
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
396
397
398

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
399
400
401
402
403
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
404
405
406
407
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

408
409
410
411
412
413
414
415
416
417
418
419
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

420
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
421
422
423
424
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
425
426
    int o_nnz[nRankRows];

427
428
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

429
    for (int i = 0; i < nRankRows; i++) {
430
      d_nnz[i] = 0;
431
432
      o_nnz[i] = 0;
    }
433

434
435
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
436
437
438
439
440
441
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
442
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
443
444
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
445
446
447
448
449
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

450
451
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
452
453
454
455
456
457
458
459
460
461

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
462
	      
463
464
465
466
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
503
504
	  }
	}
505
506
507
508
509
510
511
512
513
514
515
516
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
517

518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

550
      if (nSend > 0)
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
577

578
579
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
580
 	delete [] sendBuffers[j];
581
582
583
584
585
586
587
588

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
589

590
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
591

592
593
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
594
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
595
	    o_nnz[r]++;
596
597
	  else
	    d_nnz[r]++;
598
599
600
601
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
602
603

      i++;
604
    }
605
606

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
607
608
609
610
611
612
613
614
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
615

616
617
618
619
620
621
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
622
	if ((*mat)[i][j])
623
624
625
626
627
628
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
629
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
630

631
632
633
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

634
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
635
636
637
  }


638
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
639
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
640
641
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

642
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
643

644
    KSP ksp;
645
    KSPCreate(PETSC_COMM_WORLD, &ksp);
646
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
647
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
648
    KSPSetType(ksp, KSPBCGS);
649
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
650
651
652
653
654
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

655
    for (int i = 0; i < nRankDofs; i++)
656
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
657
658
659

    VecRestoreArray(petscSolVec, &vecPointer);

660
661
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
662
663
664

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
665
666
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
667
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
668
	 sendIt != sendDofs.end(); ++sendIt, i++) {
669
670
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
671

672
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
673
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
674

Thomas Witkowski's avatar
Thomas Witkowski committed
675
676
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
677
678
679
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
680
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
681
	 recvIt != recvDofs.end(); ++recvIt, i++) {
682
683
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
684

Thomas Witkowski's avatar
Thomas Witkowski committed
685
686
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
687
688
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
689
690
691

    MPI::Request::Waitall(requestCounter, request);

692
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
693
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
694
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
695
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
696
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
697
698
699
700

      delete [] recvBuffers[i];
    }
    
701
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
702
      delete [] sendBuffers[i];
703
704

    MatDestroy(petscMatrix);
705
706
707
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
708
709
  }

710

711
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
712
713
714
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

715
716
717
718
719
720
721
722
723
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

724
    // === Init Petsc solver. ===
725

726
    KSP solver;
727
728
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
729
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
730
731
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
732
    KSPSetFromOptions(solver);
733
734
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
735

736
737
738

    // === Run Petsc. ===

739
    KSPSolve(solver, petscRhsVec, petscSolVec);
740

741
    // === Transfere values from Petsc's solution vectors to the dof vectors.
742
743
744
745
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
746
      DOFVector<double> *dofvec = vec.getDOFVector(i);
747
      for (int j = 0; j < nRankDofs; j++)
748
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
749
750
751
752
    }

    VecRestoreArray(petscSolVec, &vecPointer);

753
754
755

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
756
    synchVectors(vec);
757

758
759
760

    // === Print information about solution process. ===

761
762
763
764
765
766
767
768
769
770
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

771
772
773

    // === Destroy Petsc's variables. ===

774
    MatDestroy(petscMatrix);
775
776
777
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
778
779
780
781
782
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
783
784
785
786
787
788
789
790
791
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
792
793
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
794
795
796

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
797
	DOFVector<double> *dofvec = vec.getDOFVector(j);
798
799
800
801
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

802
803
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
826
	DOFVector<double> *dofvec = vec.getDOFVector(j);
827
 	for (int k = 0; k < nRecvDOFs; k++)
828
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
829
830
831
832
833
834
835
836
837
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
  }

838
839
840
841
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
842
    SerUtil::serialize(out, vecSize);
843
844
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
845
      SerUtil::serialize(out, dofIndex);
846
847
848
849
850
851
852
853
854
855
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
856
    SerUtil::deserialize(in, vecSize);
857
858
859
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
860
      SerUtil::deserialize(in, dofIndex);
861
862
863
864
865
866
867
868
869
870
871
872

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
873
    SerUtil::serialize(out, mapSize);
874
875
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
876
      SerUtil::serialize(out, rank);
877
878
879
880
881
882
883
884
885
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
886
    SerUtil::deserialize(in, mapSize);
887
888
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
889
      SerUtil::deserialize(in, rank);
890
891
892
893
      deserialize(in, data[rank], dofMap);      
    }
  }

894

895
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
913
	if (partitionData->getLevel() == 0)
914
	  elNum = element->getIndex();
915
	
Thomas Witkowski's avatar
Thomas Witkowski committed
916
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
917
918
919
920
921
922
923
924
925
926
927
928
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

929

930
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
931
932
933
934
935
936
937
938
939
940
941
942
943
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

944

945
  void ParallelDomainBase::createInteriorBoundaryInfo()
946
  {
947
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
948

949
950
951
952
953
    // To be general, we have to use elInfo->getBoundary(EDGE/FACE, i) instead of
    // elInfo->getBoundar(i).
    TEST_EXIT(mesh->getDim() == 2)
      ("getBoundary(i) returns always i-th edge, generalize for 3d!\n");

954
955
956

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
957

958
    TraverseStack stack;
959
960
961
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
962
963
964
965
966
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
967

968
      // Check, if the element is within rank's partition.
969
970
971
972
973
974
975
      if (partitionData->getPartitionStatus() == IN) {
	for (int i = 0; i < 3; i++) {
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->getElementData(PARTITION_ED));
976

977
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
978

979
980
981
982
983
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

984
	    AtomicBoundary bound;	    	    
985
	    bound.rankObject.el = element;
986
	    bound.rankObject.elIndex = element->getIndex();
987
988
	    bound.rankObject.subObjAtBoundary = EDGE;
	    bound.rankObject.ithObjAtBoundary = i;
989
990
991
992
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
	    bound.neighbourObject.el = NULL;
	    bound.neighbourObject.elIndex = elInfo->getNeighbour(i)->getIndex();
993
	    bound.neighbourObject.subObjAtBoundary = EDGE;
994
	    bound.neighbourObject.ithObjAtBoundary = elInfo->getSideOfNeighbour(i);
995
	    
996
997
998
	    // i == 2  =>  getSideOfNeighbour(i) == 2
	    TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
	      ("Should not happen!\n");
999

1000
	    // Get rank number of the neighbouring element.
1001
1002
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1003
1004
1005
1006
1007
1008
1009
1010
1011

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(i))) {	      
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1023
1024
1025
1026
1027
1028
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1029

1030
1031
1032
1033

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1034

Thomas Witkowski's avatar
Thomas Witkowski committed
1035
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1036

1037
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1038
1039
1040
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1041
1042
    int requestCounter = 0;

1043

1044
1045
1046
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1047
1048
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1049
      int nSendInt = rankIt->second.size();
1050
1051
1052
1053
1054
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
	buffer[i * 2] = (rankIt->second)[i].rankObject.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObject.ithObjAtBoundary;
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1055
1056
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1057
      request[requestCounter++] =
1058
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1059
1060
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1061
1062
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1063
      int nRecvInt = rankIt->second.size();
1064
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1065
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1066

Thomas Witkowski's avatar
Thomas Witkowski committed
1067
      request[requestCounter++] = 
1068
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1069
1070
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1071
1072
1073

    MPI::Request::Waitall(requestCounter, request);

1074
1075
1076
1077
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1078
1079

    int i<