ParallelDomainBase.cc 75.1 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
#include "MeshStructure.h"
22
23

#include "petscksp.h"
24
25
26

namespace AMDiS {

27
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
28
  {    
29
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
30
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
31
32
33
34

    return 0;
  }

35
36
37
38
39
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

40
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
41
42
43
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
44
45
    : iterationIF(iIF),
      timeIF(tIF),
46
      name(iIF->getName()),
47
48
      feSpace(fe),
      mesh(fe->getMesh()),
49
      refinementManager(refineManager),
50
      initialPartitionMesh(true),
51
      nRankDofs(0),
52
      rstart(0),
53
      nComponents(1),
54
55
      deserialized(false),
      lastMeshChangeIndex(0)
56
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
57
58
59
60
61
62
63
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

64
65
66
67
68
69
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

70
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
71
  {
72
73
74
75
76
77
78
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
79
80
      return;

81
82
83
84
85
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

86
87
88
89
90
91
92
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
93
94
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
95
    dbgCreateElementMap(elMap);
96
97
98

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
99
#endif
100

101
    // === Create new global and local DOF numbering. ===
102

103
    // Set of all DOFs of the rank.
104
    std::vector<const DegreeOfFreedom*> rankDofs;
105
    // Number of DOFs in ranks partition that are owned by the rank.
106
    nRankDofs = 0;
107
    // Number of all DOFs in the macro mesh.
108
    int nOverallDOFs = 0;
109

110
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
111

Thomas Witkowski's avatar
Thomas Witkowski committed
112
113
    // === Create interior boundary information ===

114
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
115

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
116
117
118
119
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
120
#if (DEBUG != 0)
121
122
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
Thomas Witkowski's avatar
Thomas Witkowski committed
123
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
124

125
    // === Reset all DOFAdmins of the mesh. ===
126

127
    updateDofAdmins();
128

129
130
131
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
132

133
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
134

Thomas Witkowski's avatar
Thomas Witkowski committed
135
    int globalRefinement = 0;
136
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
137

Thomas Witkowski's avatar
Thomas Witkowski committed
138
139
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
140

141
142
#if (DEBUG != 0)
      elMap.clear();
143
      dbgCreateElementMap(elMap);
144
145
#endif

146
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
147
148
149
150

      updateDofAdmins();

#if (DEBUG != 0)
151
      dbgTestElementMap(elMap);
152
#endif
153

154
      // === Update periodic mapping, if there are periodic boundaries. ===
155
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
156
157
    }

158
159
    lastMeshChangeIndex = mesh->getChangeIndex();

Thomas Witkowski's avatar
Thomas Witkowski committed
160
#if (DEBUG != 0)
161
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
162
#endif
163

164
    nRankRows = nRankDofs * nComponents;
165
    nOverallRows = nOverallDOFs * nComponents;
166
167
  }

168
169

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
170
  {}
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

190

191
192
193
194
195
196
197
198
199
200
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
201
	("Mesh is already refined! This does not work with parallelization!\n");
202
203
204
205
206
207
208
209
210
211
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

212

213
214
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
215
  {
216
217
218
219
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

220
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
221
222
223
224
225
226
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

227
    typedef traits::range_generator<row, Matrix>::type cursor_type;
228
229
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

230
231
232
233
234
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

235
236
237
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

238
239
240
241
242
243
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

244
      // Global index of the current row dof.
245
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
246
      // Test if the current row dof is a periodic dof.
247
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
248

249
250
251
252
253

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

254
255
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
256
257

	// Set only non null values.
258
	if (value(*icursor) != 0.0) {
259
	  // Global index of the current column index.
260
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
261
	  // Calculate the exact position of the column index in the petsc matrix.
262
263
	  int colIndex = globalColDof * dispMult + dispAddCol;

264
265
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
266
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
267
268
269
270
271
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
272
 	    cols.push_back(colIndex);
273
 	    values.push_back(value(*icursor) * scalFactor);
274

275
276
277
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
278
279
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
280
281
 	      values.push_back(value(*icursor) * scalFactor);
	    }
282
 	  } else {
283
	    // Neigher row nor column dof index is periodic, simple add entry.
284
285
286
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
287
	}
288
289
      }

290
291
292
293
294

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
295
      int rowIndex = globalRowDof * dispMult + dispAddRow;
296
      
297
      if (periodicRow) {
298
299
300
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
301
	
302
	int diagIndex = -1;
303
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
304
305
306
307
308
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
309
310
311
	  else
	    diagIndex = i;
	}
312
313
314
315
316
317
318
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
319
320
321
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

322
	// Send the row to all periodic row indices.
323
324
325
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
326
327
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
328
329
330
	}

      } else {
331
332
333
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
334
      }    
335
    }
336
  }
337

338

339
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
340
341
					int dispMult, int dispAdd)
  {
342
    // Traverse all used dofs in the dof vector.
343
344
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
345
      // Calculate global row index of the dof.
346
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
347
      // Calculate petsc index of the row dof.
348
349
350
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
351
352
353
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
354
355
356
357
358
359
360
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
361

362
      } else {
363
	// The dof index is not periodic.
364
365
366
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
367
    }    
368
369
  }

370

371
372
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
373
374
375
376
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

377
378
379
380
381
382
383
384
385
386
387
388
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

389
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
390
391
392
393
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
394
395
    int o_nnz[nRankRows];

396
397
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

398
    for (int i = 0; i < nRankRows; i++) {
399
      d_nnz[i] = 0;
400
401
      o_nnz[i] = 0;
    }
402

403
404
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
405
406
407
408
409
410
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
411
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
412
413
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
414
415
416
417
418
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

419
420
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
421
422
423
424
425
426
427
428
429
430

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
431
	      
432
433
434
435
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
436

437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
472
473
	  }
	}
474
475
476
477
478
479
480
481
482
483
484
485
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
486

487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

519
      if (nSend > 0)
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
546

547
548
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
549
 	delete [] sendBuffers[j];
550
551
552
553
554
555
556
557

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
558

559
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
560

561
562
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
563
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
564
	    o_nnz[r]++;
565
566
	  else
	    d_nnz[r]++;
567
568
569
570
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
571
572

      i++;
573
    }
574
575

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
576
577
578
579
580
581
582
583
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
584

585
586
587
588
589
590
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
591
	if ((*mat)[i][j])
592
593
594
595
596
597
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
598
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
599

600
601
602
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

603
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
604
605
606
  }


607
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
608
609
610
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

611
612
613
614
615
616
617
618
619
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

620
    // === Init Petsc solver. ===
621

622
    KSP solver;
623
624
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
625
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
626
627
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
628
    KSPSetFromOptions(solver);
629
630
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
631

632
633
634

    // === Run Petsc. ===

635
    KSPSolve(solver, petscRhsVec, petscSolVec);
636

637
    // === Transfere values from Petsc's solution vectors to the dof vectors.
638
639
640
641
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
642
      DOFVector<double> *dofvec = vec.getDOFVector(i);
643
      for (int j = 0; j < nRankDofs; j++)
644
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
645
646
647
648
    }

    VecRestoreArray(petscSolVec, &vecPointer);

649
650
651

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
652
    synchVectors(vec);
653

654
655
656

    // === Print information about solution process. ===

657
658
659
660
661
662
663
664
665
666
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

667
668
669

    // === Destroy Petsc's variables. ===

670
    MatDestroy(petscMatrix);
671
672
673
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
674
675
676
677
678
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

698
    stdMpi.startCommunication<int>();
699
700
701
702
#endif

#if 1

703
704
705
706
707
708
709
710
711
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
712
713
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
714
715
716

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
717
	DOFVector<double> *dofvec = vec.getDOFVector(j);
718
719
720
721
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

722
723
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
746
	DOFVector<double> *dofvec = vec.getDOFVector(j);
747
 	for (int k = 0; k < nRecvDOFs; k++)
748
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
749
750
751
752
753
754
755
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
756
#endif
757
758
  }

759
760
761

  void ParallelDomainBase::checkMeshChange()
  {
762
763
    // === If mesh has not been changed, return. ===

764
765
766
    if (mesh->getChangeIndex() == lastMeshChangeIndex)
      return;

767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811

    // === Create mesh structure codes for all ranks boundary elements. ===

    typedef std::vector<MeshStructure> MeshCodeVec;
    std::map<int, MeshCodeVec > sendCodes;

    for (RankToBoundMap::iterator it = myIntBoundary.boundary.begin();
	 it != myIntBoundary.boundary.end(); ++it) {    

      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
	elCode.init(boundIt->rankObj.el, boundIt->rankObj.ithObj, 
		    boundIt->rankObj.elType);
	sendCodes[it->first].push_back(elCode);
      }
    }

    StdMpi<MeshCodeVec, MeshCodeVec> stdMpi(mpiComm);
    stdMpi.prepareCommunication(true);
    stdMpi.send(sendCodes);

    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it)
      stdMpi.recv(it->first);

    stdMpi.startCommunication<unsigned long int>();

      
#if 0
    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it) {

      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
	elCode.init(boundIt->rankObj.el, 
		    boundIt->rankObj.ithObj, 
		    boundIt->rankObj.elType);
      }
    }
#endif

812
813
814
815
816
817
    std::cout << "MESH HAS BEEN CHANGED!" << std::endl;
    exit(0);    

    lastMeshChangeIndex = mesh->getChangeIndex();
  }

818
819
820
821
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
822
    SerUtil::serialize(out, vecSize);
823
824
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
825
      SerUtil::serialize(out, dofIndex);
826
827
828
829
830
831
832
833
834
835
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
836
    SerUtil::deserialize(in, vecSize);
837
838
839
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
840
      SerUtil::deserialize(in, dofIndex);
841
842
843
844
845
846
847
848
849
850
851
852

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
853
    SerUtil::serialize(out, mapSize);
854
855
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
856
      SerUtil::serialize(out, rank);
857
858
859
860
861
862
863
864
865
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
866
    SerUtil::deserialize(in, mapSize);
867
868
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
869
      SerUtil::deserialize(in, rank);
870
871
872
873
      deserialize(in, data[rank], dofMap);      
    }
  }

874

875
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
893
	if (partitionData->getLevel() == 0)
894
	  elNum = element->getIndex();
895
	
Thomas Witkowski's avatar
Thomas Witkowski committed
896
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
897
898
899
900
901
902
903
904
905
906
907
908
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

909

910
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
911
912
913
914
915
916
917
918
919
920
921
922
923
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

924

925
  void ParallelDomainBase::createInteriorBoundaryInfo()
926
  {
927
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
928

929
930
931
932
933
934
935
936
937
938
939
940
941
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
942
943
944

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
945

946
    TraverseStack stack;
947
948
949
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
950
951
952
953
954
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
955

956
      // Check, if the element is within rank's partition.
957
      if (partitionData->getPartitionStatus() == IN) {
958
	for (int i = 0; i < nNeighbours; i++) {
959
960
961
962
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
963
964
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
965

966
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
967

968
969
970
971
972
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

973
	    AtomicBoundary bound;	    	    
974
975
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
976
	    bound.rankObj.elType = elInfo->getType();
977
	    bound.rankObj.subObj = subObj;
978
	    bound.rankObj.ithObj = i;
979
980
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
981
982
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
983
	    bound.neighObj.elType = -1;
984
	    bound.neighObj.subObj = subObj;
985
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
986
	    
987
988
989
990
991
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
992

993
	    // Get rank number of the neighbouring element.
994
995
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

996
997
998
999
1000
1001
1002
1003
1004

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1005
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1016
1017
1018
1019
1020
1021
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1022

1023
1024
1025
1026

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1027

Thomas Witkowski's avatar
Thomas Witkowski committed
1028
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1029

1030
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1031
1032
1033
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1034
1035
    int requestCounter = 0;

1036

1037
1038
1039
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1040
1041
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1042
      int nSendInt = rankIt->second.size();
1043
1044
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1045
1046
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1047
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1048
1049
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1050
      request[requestCounter++] =
1051
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1052
1053
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1054
1055
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1056
      int nRecvInt = rankIt->second.size();
1057
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1058
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1059

Thomas Witkowski's avatar
Thomas Witkowski committed
1060
      request[requestCounter++] = 
1061
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1062
1063
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1064
1065
1066

    MPI::Request::Waitall(requestCounter, request);

1067
1068
1069
1070
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1071
1072

    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
1073
1074
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1075
1076
1077
1078
1079
1080
1081

      // === We have received from rank "rankIt->first" the ordered list of element ===
      // === indices. We now have to sort the corresponding list in this rank to    ===
      // === get the same order.                                                    ===
      
      for (int j = 0; j < static_cast<int>(rankIt->second.size()); j++) {
	// If the expected object is not at place, search for it.
1082
1083
	if ((rankIt->second)[j].neighObj.elIndex != recvBuffers[i][j * 2] ||
	    (rankIt->second)[j].neighObj.ithObj != recvBuffers[i][j * 2 + 1]) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1084
1085
	  int k = j + 1;
	  for (; k < static_cast<int>(rankIt->second.size()); k++)
1086
1087
	    if ((rankIt->second)[k].neighObj.elIndex == recvBuffers[i][j * 2] && 
		(rankIt->second)[k].neighObj.ithObj == recvBuffers[i][j * 2 + 1])
Thomas Witkowski's avatar
Thomas Witkowski committed
1088
1089
1090
	      break;

	  // The element must always be found, because the list is just in another order.
Thomas Witkowski's avatar
Thomas Witkowski committed
1091
	  TEST_EXIT_DBG(k < static_cast<int>(rankIt->second.size()))
1092
	    ("Should never happen!\n");
Thomas Witkowski's avatar
Thomas Witkowski committed
1093
1094
1095
1096
1097
1098
1099
1100

	  // Swap the current with the found element.
	  AtomicBoundary tmpBound = (rankIt->second)[k];
	  (rankIt->second)[k] = (rankIt->second)[j];
	  (rankIt->second)[j] = tmpBound;	
	}
      }

1101
1102
      delete [] recvBuffers[i];
      i++;
Thomas Witkowski's avatar
Thomas Witkowski committed
1103
1104
1105
1106
    }

    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
1107
1108
1109
1110
1111

    recvBuffers.clear();
    sendBuffers.clear();
 

1112
    // === Do the same for the periodic boundaries. ===
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125

    requestCounter = 0;

    for (RankToBoundMap::iterator rankIt = periodicBoundary.boundary.begin();
	 rankIt != periodicBoundary.boundary.end(); ++rankIt) {

      TEST_EXIT_DBG(rankIt->first != mpiRank)
	("It is no possible to have an interior boundary within a rank partition!\n");

      if (rankIt->first < mpiRank) {
	int nSendInt = rankIt->second.size();
	int* buffer = new int[nSendInt * 2];
	for (int i = 0; i < nSendInt; i++) {
1126
1127