ParallelDomainBase.cc 74.6 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
21

#include "petscksp.h"
22
23
24

namespace AMDiS {

25
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
26
  {    
27
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
28
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
29
30
31
32

    return 0;
  }

33
34
35
36
37
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

38
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
39
40
41
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
42
43
    : iterationIF(iIF),
      timeIF(tIF),
44
      name(iIF->getName()),
45
46
      feSpace(fe),
      mesh(fe->getMesh()),
47
      refinementManager(refineManager),
48
      initialPartitionMesh(true),
49
      nRankDofs(0),
50
      rstart(0),
51
52
      nComponents(1),
      deserialized(false)
53
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
54
55
56
57
58
59
60
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

61
62
63
64
65
66
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

67
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
68
  {
69
70
71
72
73
74
75
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
76
77
      return;

78
79
80
81
82
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

83
84
85
86
87
88
89
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
90
91
92
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
    DbgCreateElementMap(elMap);
93
94
95

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
96
#endif
97

98
    // === Create new global and local DOF numbering. ===
99

100
    // Set of all DOFs of the rank.
101
    std::vector<const DegreeOfFreedom*> rankDofs;
102
    // Number of DOFs in ranks partition that are owned by the rank.
103
    nRankDofs = 0;
104
    // Number of all DOFs in the macro mesh.
105
    int nOverallDOFs = 0;
106

107
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
108

Thomas Witkowski's avatar
Thomas Witkowski committed
109
110
    // === Create interior boundary information ===

111
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
112

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
113
114
115
116
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
117
#if (DEBUG != 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
118
    DbgTestElementMap(elMap);
Thomas Witkowski's avatar
Thomas Witkowski committed
119
120
    DbgTestInteriorBoundary();
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
121

122
    // === Reset all DOFAdmins of the mesh. ===
123

124
    updateDofAdmins();
125

126
127
128
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
129

130

131
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
132

Thomas Witkowski's avatar
Thomas Witkowski committed
133
134
135
    int globalRefinement = 0;
    GET_PARAMETER(0, "testgr", "%d", &globalRefinement);

Thomas Witkowski's avatar
Thomas Witkowski committed
136
137
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
138

139
140
141
142
143
#if (DEBUG != 0)
      elMap.clear();
      DbgCreateElementMap(elMap);
#endif

144
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
145
146
147
148
149
150

      updateDofAdmins();

#if (DEBUG != 0)
      DbgTestElementMap(elMap);
#endif
151

152
      // === Update periodic mapping, if there are periodic boundaries. ===
153
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
154
155
    }

156
    //  exit(0);
157

Thomas Witkowski's avatar
Thomas Witkowski committed
158
#if (DEBUG != 0)
159
    DbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
160
#endif
161

162
    nRankRows = nRankDofs * nComponents;
163
    nOverallRows = nOverallDOFs * nComponents;
164
165
  }

166
167

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
168
  {}
169

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

188

189
190
191
192
193
194
195
196
197
198
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
199
	("Mesh is already refined! This does not work with parallelization!\n");
200
201
202
203
204
205
206
207
208
209
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

210

211
212
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
213
  {
214
215
216
217
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

218
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
219
220
221
222
223
224
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

225
    typedef traits::range_generator<row, Matrix>::type cursor_type;
226
227
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

228
229
230
231
232
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

233
234
235
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

236
237
238
239
240
241
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

242
      // Global index of the current row dof.
243
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
244
      // Test if the current row dof is a periodic dof.
245
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
246

247
248
249
250
251

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

252
253
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
254
255

	// Set only non null values.
256
	if (value(*icursor) != 0.0) {
257
	  // Global index of the current column index.
258
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
259
	  // Calculate the exact position of the column index in the petsc matrix.
260
261
	  int colIndex = globalColDof * dispMult + dispAddCol;

262
263
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
264
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
265
266
267
268
269
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
270
 	    cols.push_back(colIndex);
271
 	    values.push_back(value(*icursor) * scalFactor);
272

273
274
275
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
276
277
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
278
279
 	      values.push_back(value(*icursor) * scalFactor);
	    }
280
 	  } else {
281
	    // Neigher row nor column dof index is periodic, simple add entry.
282
283
284
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
285
	}
286
287
      }

288
289
290
291
292

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
293
      int rowIndex = globalRowDof * dispMult + dispAddRow;
294
      
295
      if (periodicRow) {
296
297
298
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
299
	
300
	int diagIndex = -1;
301
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
302
303
304
305
306
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
307
308
309
	  else
	    diagIndex = i;
	}
310
311
312
313
314
315
316
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
317
318
319
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

320
	// Send the row to all periodic row indices.
321
322
323
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
324
325
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
326
327
328
	}

      } else {
329
330
331
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
332
      }    
333
    }
334
  }
335

336

337
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
338
339
					int dispMult, int dispAdd)
  {
340
    // Traverse all used dofs in the dof vector.
341
342
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
343
      // Calculate global row index of the dof.
344
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
345
      // Calculate petsc index of the row dof.
346
347
348
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
349
350
351
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
352
353
354
355
356
357
358
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
359

360
      } else {
361
	// The dof index is not periodic.
362
363
364
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
365
    }    
366
367
  }

368

369
370
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
371
372
373
374
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

375
376
377
378
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

379
380
381
382
383
384
385
386
387
388
389
390
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

391
392
393
394
395
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

396
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
397
398
399

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
400
401
402
403
404
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
405
406
407
408
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

409
410
411
412
413
414
415
416
417
418
419
420
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

421
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
422
423
424
425
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
426
427
    int o_nnz[nRankRows];

428
429
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

430
    for (int i = 0; i < nRankRows; i++) {
431
      d_nnz[i] = 0;
432
433
      o_nnz[i] = 0;
    }
434

435
436
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
437
438
439
440
441
442
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
443
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
444
445
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
446
447
448
449
450
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

451
452
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
453
454
455
456
457
458
459
460
461
462

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
463
	      
464
465
466
467
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
468

469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
504
505
	  }
	}
506
507
508
509
510
511
512
513
514
515
516
517
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
518

519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

551
      if (nSend > 0)
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
578

579
580
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
581
 	delete [] sendBuffers[j];
582
583
584
585
586
587
588
589

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
590

591
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
592

593
594
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
595
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
596
	    o_nnz[r]++;
597
598
	  else
	    d_nnz[r]++;
599
600
601
602
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
603
604

      i++;
605
    }
606
607

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
608
609
610
611
612
613
614
615
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
616

617
618
619
620
621
622
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
623
	if ((*mat)[i][j])
624
625
626
627
628
629
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
630
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
631

632
633
634
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

635
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
636
637
638
  }


639
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
640
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
641
642
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

643
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
644

645
    KSP ksp;
646
    KSPCreate(PETSC_COMM_WORLD, &ksp);
647
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
648
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
649
    KSPSetType(ksp, KSPBCGS);
650
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
651
652
653
654
655
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

656
    for (int i = 0; i < nRankDofs; i++)
657
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
658
659
660

    VecRestoreArray(petscSolVec, &vecPointer);

661
662
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
663
664
665

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
666
667
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
668
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
669
	 sendIt != sendDofs.end(); ++sendIt, i++) {
670
671
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
672

673
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
674
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
675

Thomas Witkowski's avatar
Thomas Witkowski committed
676
677
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
678
679
680
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
681
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
682
	 recvIt != recvDofs.end(); ++recvIt, i++) {
683
684
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
685

Thomas Witkowski's avatar
Thomas Witkowski committed
686
687
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
688
689
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
690
691
692

    MPI::Request::Waitall(requestCounter, request);

693
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
694
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
695
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
696
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
697
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
698
699
700
701

      delete [] recvBuffers[i];
    }
    
702
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
703
      delete [] sendBuffers[i];
704
705

    MatDestroy(petscMatrix);
706
707
708
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
709
710
  }

711

712
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
713
714
715
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

716
717
718
719
720
721
722
723
724
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

725
    // === Init Petsc solver. ===
726

727
    KSP solver;
728
729
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
730
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
731
732
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
733
    KSPSetFromOptions(solver);
734
735
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
736

737
738
739

    // === Run Petsc. ===

740
    KSPSolve(solver, petscRhsVec, petscSolVec);
741

742
    // === Transfere values from Petsc's solution vectors to the dof vectors.
743
744
745
746
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
747
      DOFVector<double> *dofvec = vec.getDOFVector(i);
748
      for (int j = 0; j < nRankDofs; j++)
749
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
750
751
752
753
    }

    VecRestoreArray(petscSolVec, &vecPointer);

754
755
756

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
757
    synchVectors(vec);
758

759
760
761

    // === Print information about solution process. ===

762
763
764
765
766
767
768
769
770
771
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

772
773
774

    // === Destroy Petsc's variables. ===

775
    MatDestroy(petscMatrix);
776
777
778
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
779
780
781
782
783
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
784
785
786
787
788
789
790
791
792
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
793
794
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
795
796
797

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
798
	DOFVector<double> *dofvec = vec.getDOFVector(j);
799
800
801
802
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

803
804
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
827
	DOFVector<double> *dofvec = vec.getDOFVector(j);
828
 	for (int k = 0; k < nRecvDOFs; k++)
829
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
830
831
832
833
834
835
836
837
838
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
  }

839
840
841
842
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
843
    SerUtil::serialize(out, vecSize);
844
845
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
846
      SerUtil::serialize(out, dofIndex);
847
848
849
850
851
852
853
854
855
856
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
857
    SerUtil::deserialize(in, vecSize);
858
859
860
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
861
      SerUtil::deserialize(in, dofIndex);
862
863
864
865
866
867
868
869
870
871
872
873

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
874
    SerUtil::serialize(out, mapSize);
875
876
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
877
      SerUtil::serialize(out, rank);
878
879
880
881
882
883
884
885
886
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
887
    SerUtil::deserialize(in, mapSize);
888
889
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
890
      SerUtil::deserialize(in, rank);
891
892
893
894
      deserialize(in, data[rank], dofMap);      
    }
  }

895

896
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
914
	if (partitionData->getLevel() == 0)
915
	  elNum = element->getIndex();
916
	
Thomas Witkowski's avatar
Thomas Witkowski committed
917
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
918
919
920
921
922
923
924
925
926
927
928
929
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

930

931
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
932
933
934
935
936
937
938
939
940
941
942
943
944
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

945

946
  void ParallelDomainBase::createInteriorBoundaryInfo()
947
  {
948
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
949

950
951
952
953
954
    // To be general, we have to use elInfo->getBoundary(EDGE/FACE, i) instead of
    // elInfo->getBoundar(i).
    TEST_EXIT(mesh->getDim() == 2)
      ("getBoundary(i) returns always i-th edge, generalize for 3d!\n");

955
956
957

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
958

959
    TraverseStack stack;
960
961
962
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
963
964
965
966
967
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
968

969
      // Check, if the element is within rank's partition.
970
971
972
973
974
975
976
      if (partitionData->getPartitionStatus() == IN) {
	for (int i = 0; i < 3; i++) {
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->getElementData(PARTITION_ED));
977

978
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
979

980
981
982
983
984
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

985
	    AtomicBoundary bound;	    	    
986
987
988
989
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
	    bound.rankObj.subObj = EDGE;
	    bound.rankObj.ithObj = i;
990
991
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
992
993
994
995
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
	    bound.neighObj.subObj = EDGE;
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
996
	    
997
998
999
	    // i == 2  =>  getSideOfNeighbour(i) == 2
	    TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
	      ("Should not happen!\n");
1000

1001
	    // Get rank number of the neighbouring element.
1002
1003
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1004
1005
1006
1007
1008
1009
1010
1011
1012

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(i))) {	      
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1024
1025
1026
1027
1028
1029
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1030

1031
1032
1033
1034

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1035

Thomas Witkowski's avatar
Thomas Witkowski committed
1036
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1037

1038
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1039
1040
1041
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1042
1043
    int requestCounter = 0;

1044

1045
1046
1047
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1048
1049
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1050
      int nSendInt = rankIt->second.size();
1051
1052
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1053
1054
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1055
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1056
1057
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1058
      request[requestCounter++] =
1059
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1060
1061
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1062
1063
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1064
      int nRecvInt = rankIt->second.size();
1065
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1066
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1067

Thomas Witkowski's avatar
Thomas Witkowski committed
1068
      request[requestCounter++] = 
1069
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1070
1071
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1072
1073
1074

    MPI::Request::Waitall(requestCounter, request);

1075
1076
1077
1078
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===