ParallelDomainBase.cc 75.3 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
#include "MeshStructure.h"
22
23

#include "petscksp.h"
24
25
26

namespace AMDiS {

27
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
28
  {    
29
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
30
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
31
32
33
34

    return 0;
  }

35
36
37
38
39
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

40
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
41
42
43
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
44
45
    : iterationIF(iIF),
      timeIF(tIF),
46
      name(iIF->getName()),
47
48
      feSpace(fe),
      mesh(fe->getMesh()),
49
      refinementManager(refineManager),
50
      initialPartitionMesh(true),
51
      nRankDofs(0),
52
      rstart(0),
53
      nComponents(1),
54
55
      deserialized(false),
      lastMeshChangeIndex(0)
56
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
57
58
59
60
61
62
63
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

64
65
66
67
68
69
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

70
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
71
  {
72
73
74
75
76
77
78
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
79
80
      return;

81
82
83
84
85
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

86
87
88
89
90
91
92
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
93
94
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
95
    dbgCreateElementMap(elMap);
96
97
98

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
99
#endif
100

101
    // === Create new global and local DOF numbering. ===
102

103
    // Set of all DOFs of the rank.
104
    std::vector<const DegreeOfFreedom*> rankDofs;
105
    // Number of DOFs in ranks partition that are owned by the rank.
106
    nRankDofs = 0;
107
    // Number of all DOFs in the macro mesh.
108
    int nOverallDOFs = 0;
109

110
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
111

Thomas Witkowski's avatar
Thomas Witkowski committed
112
113
    // === Create interior boundary information ===

114
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
115

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
116
117
118
119
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
120
#if (DEBUG != 0)
121
122
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
Thomas Witkowski's avatar
Thomas Witkowski committed
123
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
124

125
    // === Reset all DOFAdmins of the mesh. ===
126

127
    updateDofAdmins();
128

129
130
131
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
132

133
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
134

Thomas Witkowski's avatar
Thomas Witkowski committed
135
    int globalRefinement = 0;
136
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
137

Thomas Witkowski's avatar
Thomas Witkowski committed
138
139
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
140

141
142
#if (DEBUG != 0)
      elMap.clear();
143
      dbgCreateElementMap(elMap);
144
145
#endif

146
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
147
148
149
150

      updateDofAdmins();

#if (DEBUG != 0)
151
      dbgTestElementMap(elMap);
152
#endif
153

154
      // === Update periodic mapping, if there are periodic boundaries. ===
155
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
156
157
    }

158
159
    lastMeshChangeIndex = mesh->getChangeIndex();

Thomas Witkowski's avatar
Thomas Witkowski committed
160
#if (DEBUG != 0)
161
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
162
#endif
163

164
    nRankRows = nRankDofs * nComponents;
165
    nOverallRows = nOverallDOFs * nComponents;
166
167
  }

168
169

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
170
  {}
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

190

191
192
193
194
195
196
197
198
199
200
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
201
	("Mesh is already refined! This does not work with parallelization!\n");
202
203
204
205
206
207
208
209
210
211
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

212

213
214
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
215
  {
216
217
218
219
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

220
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
221
222
223
224
225
226
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

227
    typedef traits::range_generator<row, Matrix>::type cursor_type;
228
229
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

230
231
232
233
234
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

235
236
237
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

238
239
240
241
242
243
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

244
      // Global index of the current row dof.
245
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
246
      // Test if the current row dof is a periodic dof.
247
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
248

249
250
251
252
253

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

254
255
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
256
257

	// Set only non null values.
258
	if (value(*icursor) != 0.0) {
259
	  // Global index of the current column index.
260
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
261
	  // Calculate the exact position of the column index in the petsc matrix.
262
263
	  int colIndex = globalColDof * dispMult + dispAddCol;

264
265
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
266
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
267
268
269
270
271
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
272
 	    cols.push_back(colIndex);
273
 	    values.push_back(value(*icursor) * scalFactor);
274

275
276
277
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
278
279
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
280
281
 	      values.push_back(value(*icursor) * scalFactor);
	    }
282
 	  } else {
283
	    // Neigher row nor column dof index is periodic, simple add entry.
284
285
286
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
287
	}
288
289
      }

290
291
292
293
294

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
295
      int rowIndex = globalRowDof * dispMult + dispAddRow;
296
      
297
      if (periodicRow) {
298
299
300
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
301
	
302
	int diagIndex = -1;
303
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
304
305
306
307
308
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
309
310
311
	  else
	    diagIndex = i;
	}
312
313
314
315
316
317
318
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
319
320
321
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

322
	// Send the row to all periodic row indices.
323
324
325
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
326
327
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
328
329
330
	}

      } else {
331
332
333
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
334
      }    
335
    }
336
  }
337

338

339
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
340
341
					int dispMult, int dispAdd)
  {
342
    // Traverse all used dofs in the dof vector.
343
344
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
345
      // Calculate global row index of the dof.
346
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
347
      // Calculate petsc index of the row dof.
348
349
350
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
351
352
353
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
354
355
356
357
358
359
360
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
361

362
      } else {
363
	// The dof index is not periodic.
364
365
366
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
367
    }    
368
369
  }

370

371
372
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
373
374
375
376
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

377
378
379
380
381
382
383
384
385
386
387
388
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

389
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
390
391
392
393
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
394
395
    int o_nnz[nRankRows];

396
397
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

398
    for (int i = 0; i < nRankRows; i++) {
399
      d_nnz[i] = 0;
400
401
      o_nnz[i] = 0;
    }
402

403
404
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
405
406
407
408
409
410
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
411
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
412
413
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
414
415
416
417
418
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

419
420
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
421
422
423
424
425
426
427
428
429
430

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
431
	      
432
433
434
435
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
436

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
472
473
	  }
	}
474
475
476
477
478
479
480
481
482
483
484
485
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
486

487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

519
      if (nSend > 0)
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
546

547
548
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
549
 	delete [] sendBuffers[j];
550
551
552
553
554
555
556
557

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
558

559
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
560

561
562
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
563
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
564
	    o_nnz[r]++;
565
566
	  else
	    d_nnz[r]++;
567
568
569
570
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
571
572

      i++;
573
    }
574
575

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
576
577
578
579
580
581
582
583
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
584

585
586
587
588
589
590
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
591
	if ((*mat)[i][j])
592
593
594
595
596
597
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
598
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
599

600
601
602
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

603
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
604
605
606
  }


607
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
608
609
610
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

611
612
613
614
615
616
617
618
619
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

620
    // === Init Petsc solver. ===
621

622
    KSP solver;
623
624
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
625
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
626
627
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
628
    KSPSetFromOptions(solver);
629
630
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
631

632
633
634

    // === Run Petsc. ===

635
    KSPSolve(solver, petscRhsVec, petscSolVec);
636

637
    // === Transfere values from Petsc's solution vectors to the dof vectors.
638
639
640
641
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
642
      DOFVector<double> *dofvec = vec.getDOFVector(i);
643
      for (int j = 0; j < nRankDofs; j++)
644
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
645
646
647
648
    }

    VecRestoreArray(petscSolVec, &vecPointer);

649
650
651

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
652
    synchVectors(vec);
653

654
655
656

    // === Print information about solution process. ===

657
658
659
660
661
662
663
664
665
666
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

667
668
669

    // === Destroy Petsc's variables. ===

670
    MatDestroy(petscMatrix);
671
672
673
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
674
675
676
677
678
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

698
    stdMpi.startCommunication<int>();
699
700
701
702
#endif

#if 1

703
704
705
706
707
708
709
710
711
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
712
713
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
714
715
716

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
717
	DOFVector<double> *dofvec = vec.getDOFVector(j);
718
719
720
721
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

722
723
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
746
	DOFVector<double> *dofvec = vec.getDOFVector(j);
747
 	for (int k = 0; k < nRecvDOFs; k++)
748
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
749
750
751
752
753
754
755
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
756
#endif
757
758
  }

759
760
761

  void ParallelDomainBase::checkMeshChange()
  {
762
763
    // === If mesh has not been changed, return. ===

764
765
766
    if (mesh->getChangeIndex() == lastMeshChangeIndex)
      return;

767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795

    // === Create mesh structure codes for all ranks boundary elements. ===

    typedef std::vector<MeshStructure> MeshCodeVec;
    std::map<int, MeshCodeVec > sendCodes;

    for (RankToBoundMap::iterator it = myIntBoundary.boundary.begin();
	 it != myIntBoundary.boundary.end(); ++it) {    

      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
	elCode.init(boundIt->rankObj.el, boundIt->rankObj.ithObj, 
		    boundIt->rankObj.elType);
	sendCodes[it->first].push_back(elCode);
      }
    }

    StdMpi<MeshCodeVec, MeshCodeVec> stdMpi(mpiComm);
    stdMpi.prepareCommunication(true);
    stdMpi.send(sendCodes);

    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it)
      stdMpi.recv(it->first);

    stdMpi.startCommunication<unsigned long int>();

796
797
798
799
800

    // === Compare received mesh structure codes. ===

    bool meshFitTogether = true;

801
802
803
    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it) {

804
805
806
      MeshCodeVec &recvCodes = stdMpi.getRecvData()[it->first];
      int i = 0;

807
808
809
810
      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
811
	elCode.init(boundIt->rankObj.el, boundIt->rankObj.ithObj, 
812
		    boundIt->rankObj.elType);
813
814
815
816
817

	if (elCode.getCode() != recvCodes[i].getCode())
	  meshFitTogether = false;

	i++;
818
819
820
      }
    }

821
822
823
824
    if (!meshFitTogether) {
      std::cout << "MESH HAS BEEN CHANGED!" << std::endl;
      exit(0);    
    }
825
826
827
828

    lastMeshChangeIndex = mesh->getChangeIndex();
  }

829
830
831
832
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
833
    SerUtil::serialize(out, vecSize);
834
835
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
836
      SerUtil::serialize(out, dofIndex);
837
838
839
840
841
842
843
844
845
846
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
847
    SerUtil::deserialize(in, vecSize);
848
849
850
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
851
      SerUtil::deserialize(in, dofIndex);
852
853
854
855
856
857
858
859
860
861
862
863

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
864
    SerUtil::serialize(out, mapSize);
865
866
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
867
      SerUtil::serialize(out, rank);
868
869
870
871
872
873
874
875
876
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
877
    SerUtil::deserialize(in, mapSize);
878
879
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
880
      SerUtil::deserialize(in, rank);
881
882
883
884
      deserialize(in, data[rank], dofMap);      
    }
  }

885

886
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
904
	if (partitionData->getLevel() == 0)
905
	  elNum = element->getIndex();
906
	
Thomas Witkowski's avatar
Thomas Witkowski committed
907
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
908
909
910
911
912
913
914
915
916
917
918
919
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

920

921
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
922
923
924
925
926
927
928
929
930
931
932
933
934
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

935

936
  void ParallelDomainBase::createInteriorBoundaryInfo()
937
  {
938
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
939

940
941
942
943
944
945
946
947
948
949
950
951
952
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
953
954
955

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
956

957
    TraverseStack stack;
958
959
960
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
961
962
963
964
965
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
966

967
      // Check, if the element is within rank's partition.
968
      if (partitionData->getPartitionStatus() == IN) {
969
	for (int i = 0; i < nNeighbours; i++) {
970
971
972
973
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
974
975
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
976

977
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
978

979
980
981
982
983
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

984
	    AtomicBoundary bound;	    	    
985
986
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
987
	    bound.rankObj.elType = elInfo->getType();
988
	    bound.rankObj.subObj = subObj;
989
	    bound.rankObj.ithObj = i;
990
991
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
992
993
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
994
	    bound.neighObj.elType = -1;
995
	    bound.neighObj.subObj = subObj;
996
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
997
	    
998
999
1000
1001
1002
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
1003

1004
	    // Get rank number of the neighbouring element.
1005
1006
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1007
1008
1009
1010
1011
1012
1013
1014
1015

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1016
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1027
1028
1029
1030
1031
1032
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1033

1034
1035
1036
1037

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1038

Thomas Witkowski's avatar
Thomas Witkowski committed
1039
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1040

1041
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1042
1043
1044
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1045
1046
    int requestCounter = 0;

1047

1048
1049
1050
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1051
1052
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1053
      int nSendInt = rankIt->second.size();
1054
1055
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1056
1057
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1058
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1059
1060
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1061
      request[requestCounter++] =
1062
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1063
1064
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1065
1066
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1067
      int nRecvInt = rankIt->second.size();
1068
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1069
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1070

Thomas Witkowski's avatar
Thomas Witkowski committed
1071
      request[requestCounter++] = 
1072
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1073
1074
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1075
1076
1077

    MPI::Request::Waitall(requestCounter, request);

1078
1079
1080
1081
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1082
1083

    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
1084
1085
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1086
1087
1088
1089
1090
1091
1092

      // === We have received from rank "rankIt->first" the ordered list of element ===
      // === indices. We now have to sort the corresponding list in this rank to    ===
      // === get the same order.                                                    ===
      
      for (int j = 0; j < static_cast<int>(rankIt->second.size()); j++) {
	// If the expected object is not at place, search for it.
1093
1094
	if ((rankIt->second)[j].neighObj.elIndex != recvBuffers[i][j * 2] ||
	    (rankIt->second)[j].neighObj.ithObj != recvBuffers[i][j * 2 + 1]) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1095
1096
	  int k = j + 1;
	  for (; k < static_cast<int>(rankIt->second.size()); k++)
1097
1098
	    if ((rankIt->second)[k].neighObj.elIndex == recvBuffers[i][j * 2] && 
		(rankIt->second)[k].neighObj.ithObj == recvBuffers[i][j * 2 + 1])
Thomas Witkowski's avatar
Thomas Witkowski committed
1099
1100
1101
	      break;

	  // The element must always be found, because the list is just in another order.
Thomas Witkowski's avatar
Thomas Witkowski committed
1102
	  TEST_EXIT_DBG(k < static_cast<int>(rankIt->second.size()))
1103
	    ("Should never happen!\n");
Thomas Witkowski's avatar
Thomas Witkowski committed
1104
1105
1106
1107
1108
1109
1110
1111

	  // Swap the current with the found element.
	  AtomicBoundary tmpBound = (rankIt->second)[k];
	  (rankIt->second)[k] = (rankIt->second)[j];
	  (rankIt->second)[j] = tmpBound;	
	}
      }<