ParallelDomainBase.cc 79.8 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
#include <algorithm>
2
#include <boost/lexical_cast.hpp>
Thomas Witkowski's avatar
Thomas Witkowski committed
3

4
#include "ParallelDomainBase.h"
5
6
7
8
9
10
11
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
12
13
#include "DOFMatrix.h"
#include "DOFVector.h"
14
#include "SystemVector.h"
15
#include "VtkWriter.h"
16
#include "ElementDofIterator.h"
17
18
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
19
#include "ElementFileWriter.h"
20
#include "VertexVector.h"
21
#include "StdMpi.h"
22
#include "MeshStructure.h"
23
24

#include "petscksp.h"
25
26
27

namespace AMDiS {

28
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
29
  {    
30
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
31
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
32
33
34
35

    return 0;
  }

36
37
38
39
40
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

41
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
42
43
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
44
					 RefinementManager *refinementManager)
45
46
    : iterationIF(iIF),
      timeIF(tIF),
47
      name(iIF->getName()),
48
49
      feSpace(fe),
      mesh(fe->getMesh()),
50
      refineManager(refinementManager),
51
      initialPartitionMesh(true),
52
      nRankDofs(0),
53
      rstart(0),
54
      nComponents(1),
55
56
      deserialized(false),
      lastMeshChangeIndex(0)
57
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
58
59
60
61
62
63
64
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

65
66
67
68
69
70
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

71
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
72
  {
73
74
75
76
77
78
79
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
80
81
      return;

82
83
84
85
86
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

87
88
    MSG("START PARTITIONING!\n");

89
90
91
92
93
94
95
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
96
97
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
98
    dbgCreateElementMap(elMap);
99
100
101
    if (mpiRank == 0) {
      int writePartMesh = 1;
      GET_PARAMETER(0, "dbg->write part mesh", "%d", &writePartMesh);
102

103
104
105
106
107
      if (writePartMesh > 0)
	writePartitioningMesh("part.vtu");
      else 
	MSG("Skip write part mesh!\n");
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
108
#endif
109

110
    // === Create new global and local DOF numbering. ===
111

112
    createLocalGlobalNumbering();
113

114
    // === Create interior boundary information. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
115

116
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
117

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
118
119
120
121
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

122
123
    // === If in debug mode, make some tests. ===

Thomas Witkowski's avatar
Thomas Witkowski committed
124
#if (DEBUG != 0)
125
    MSG("AMDiS runs in debug mode, so make some test ...\n");
126
127
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
128
    dbgTestCommonDofs(true);
129
    MSG("Debug mode tests finished!\n");
Thomas Witkowski's avatar
Thomas Witkowski committed
130
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
131

132
    // === Reset all DOFAdmins of the mesh. ===
133

134
    updateDofAdmins();
135

136
137
138
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
139

140
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
141

Thomas Witkowski's avatar
Thomas Witkowski committed
142
    int globalRefinement = 0;
143
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
144

Thomas Witkowski's avatar
Thomas Witkowski committed
145
    if (globalRefinement > 0) {
146
      refineManager->globalRefine(mesh, globalRefinement);
147

148
      updateLocalGlobalNumbering();
149

150
      // === Update periodic mapping, if there are periodic boundaries. ===
151

152
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
153
    }
154
155
  }

156
157

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
158
  {}
159

160
161
162
  
  void ParallelDomainBase::updateDofAdmins()
  {
163
164
165
    FUNCNAME("ParallelDomainBase::updateDofAdmins()");

    for (int i = 0; i < mesh->getNumberOfDOFAdmin(); i++) {
166
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
167
168
169
170
171

      // There must be always more allocated DOFs than used DOFs in DOFAdmin. Otherwise,
      // it is not possible to define a first DOF hole.
      if (static_cast<unsigned int>(admin.getSize()) == mapLocalGlobalDOFs.size())
	admin.enlargeDOFLists();
172
173
174
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
175
      for (unsigned int j = 0; j < mapLocalGlobalDOFs.size(); j++)
176
177
178
179
180
181
182
183
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

184

185
186
187
188
189
190
191
192
193
194
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
195
	("Mesh is already refined! This does not work with parallelization!\n");
196
197
198
199
200
201
202
203
204
205
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

206

207
208
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
209
  {
210
211
212
213
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

214
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
215
216
217
218
219
220
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

221
    typedef traits::range_generator<row, Matrix>::type cursor_type;
222
223
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

224
225
226
227
228
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

229
230
231
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

232
233
234
235
236
237
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

238
      // Global index of the current row dof.
239
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
240
      // Test if the current row dof is a periodic dof.
241
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
242

243
244
245

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
246
      // === with the corresponding values.                                   ===
247

248
249
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
250
251

	// Set only non null values.
252
	if (value(*icursor) != 0.0) {
253
	  // Global index of the current column index.
254
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
255
	  // Calculate the exact position of the column index in the petsc matrix.
256
257
	  int colIndex = globalColDof * dispMult + dispAddCol;

258
259
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
260
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
261
262
263
264
265
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
266
 	    cols.push_back(colIndex);
267
 	    values.push_back(value(*icursor) * scalFactor);
268

269
270
271
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
272
273
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
274
275
 	      values.push_back(value(*icursor) * scalFactor);
	    }
276
 	  } else {
277
	    // Neigher row nor column dof index is periodic, simple add entry.
278
279
280
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
281
	}
282
283
      }

284
285
286
287
288

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
289
      int rowIndex = globalRowDof * dispMult + dispAddRow;
290
      
291
      if (periodicRow) {
292
293
294
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
295
	
296
	int diagIndex = -1;
297
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
298
299
300
301
302
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
303
304
305
	  else
	    diagIndex = i;
	}
306
307
308
309
310
311
312
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
313
314
315
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

316
	// Send the row to all periodic row indices.
317
318
319
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
320
321
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
322
323
324
	}

      } else {
325
326
327
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
328
      }    
329
    }
330
  }
331

332

333
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
334
335
					int dispMult, int dispAdd)
  {
336
    // Traverse all used dofs in the dof vector.
337
338
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
339
      // Calculate global row index of the dof.
340
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
341
      // Calculate petsc index of the row dof.
342
343
344
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
345
346
347
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
348
349
350
351
352
353
354
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
355

356
      } else {
357
	// The dof index is not periodic.
358
359
360
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
361
    }    
362
363
  }

364

365
366
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
367
368
369
370
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

371
372
373
374
375
376
377
378
379
380
381
382
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

383
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
384
385
386
387
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
388
389
    int o_nnz[nRankRows];

390
391
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

392
    for (int i = 0; i < nRankRows; i++) {
393
      d_nnz[i] = 0;
394
395
      o_nnz[i] = 0;
    }
396

397
398
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
399
400
401
402
403
404
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
405
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
406
407
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
408
409
410
411
412
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

413
414
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
415
416
417
418
419
420
421
422
423
424

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
425
	      
426
427
428
429
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
430

431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
466
467
	  }
	}
468
469
470
471
472
473
474
475
476
477
478
479
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
480

481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

513
      if (nSend > 0)
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
540

541
542
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
543
 	delete [] sendBuffers[j];
544
545
546
547
548
549
550
551

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
552

553
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
554

555
556
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
557
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
558
	    o_nnz[r]++;
559
560
	  else
	    d_nnz[r]++;
561
562
563
564
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
565
566

      i++;
567
    }
568
569

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
570
571
572
573
574
575
576
577
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
578

579
580
581
582
583
584
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
585
	if ((*mat)[i][j])
586
587
588
589
590
591
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
592
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
593

594
595
596
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

597
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
598
599
600
  }


601
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
602
603
604
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

605
606
607
608
609
610
611
612
613
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

614
    // === Init Petsc solver. ===
615

616
    KSP solver;
617
618
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
619
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
620
621
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
622
    KSPSetFromOptions(solver);
623
624
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
625

626
627
628

    // === Run Petsc. ===

629
    KSPSolve(solver, petscRhsVec, petscSolVec);
630

631
    // === Transfere values from Petsc's solution vectors to the dof vectors.
632
633
634
635
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
636
      DOFVector<double> *dofvec = vec.getDOFVector(i);
637
      for (int j = 0; j < nRankDofs; j++)
638
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
639
640
641
642
    }

    VecRestoreArray(petscSolVec, &vecPointer);

643
644
645

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
646
    synchVectors(vec);
647

648
649
650

    // === Print information about solution process. ===

651
652
653
654
655
656
657
658
659
660
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

661
662
663

    // === Destroy Petsc's variables. ===

664
    MatDestroy(petscMatrix);
665
666
667
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
668
669
670
671
672
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

692
    stdMpi.startCommunication<int>();
693
694
695
696
#endif

#if 1

697
698
699
700
701
702
703
704
705
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
706
707
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
708
709
710

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
711
	DOFVector<double> *dofvec = vec.getDOFVector(j);
712
713
714
715
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

716
717
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }

    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
739
	DOFVector<double> *dofvec = vec.getDOFVector(j);
740
 	for (int k = 0; k < nRecvDOFs; k++)
741
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
742
743
744
745
746
747
748
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
749
#endif
750
751
  }

752
753
754

  void ParallelDomainBase::checkMeshChange()
  {
755
756
    FUNCNAME("ParallelDomainBase::checkMeshChange()");

757
758
759
760
761
    // === If mesh has not been changed on all ranks, return. ===

    int recvAllValues = 0;
    int sendValue = static_cast<int>(mesh->getChangeIndex() != lastMeshChangeIndex);
    mpiComm.Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM);
762

763
    if (recvAllValues == 0)
764
765
      return;

766
767
    // === At least one rank mesh has been changed, so the boundaries must be ===
    // === adapted to the new mesh structure.                                 ===
768

769
770
771
772
773
774
775
776
777
    clock_t first = clock();
    
    do {
      // To check the interior boundaries, the ownership of the boundaries is not 
      // important. Therefore, we add all boundaries to one boundary container.
      RankToBoundMap allBound = myIntBoundary.boundary;
      for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin(); 
	   it != otherIntBoundary.boundary.end(); ++it)
	allBound[it->first] = it->second;
778

779
      // === Check the boundaries and adapt mesh if necessary. ===
780

781
782
783
784
785
786
787
788
      bool meshChanged = checkAndAdaptBoundary(allBound);

      // === Check on all ranks if at least one rank's mesh has changed. ===

      int sendValue = static_cast<int>(!meshChanged);
      recvAllValues = 0;
      mpiComm.Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM);
    } while (recvAllValues != 0);
789

790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812

#if (DEBUG != 0)
    writeMesh(feSpace);
#endif

    INFO(info, 8)("Parallel mesh adaption needed %.5f seconds\n", 
		  TIME_USED(first, clock()));

    // === Because the mesh has been changed, update the DOF numbering and mappings. ===
   
    updateLocalGlobalNumbering();
  }

  
  bool ParallelDomainBase::checkAndAdaptBoundary(RankToBoundMap &allBound)
  {
    FUNCNAME("ParallelDomainBase::checkAndAdaptBoundary()");

    // === Create mesh structure codes for all ranks boundary elements. ===
       
    std::map<int, MeshCodeVec> sendCodes;
   
    for (RankToBoundMap::iterator it = allBound.begin(); it != allBound.end(); ++it) {
813
814
815
      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {
	MeshStructure elCode;
816
	elCode.init(boundIt->rankObj);
817
818
819
820
821
822
823
	sendCodes[it->first].push_back(elCode);
      }
    }

    StdMpi<MeshCodeVec, MeshCodeVec> stdMpi(mpiComm);
    stdMpi.prepareCommunication(true);
    stdMpi.send(sendCodes);
824
825
826
    stdMpi.recv(allBound);
    stdMpi.startCommunication<unsigned long int>(MPI_UNSIGNED_LONG);
 
827
    // === Compare received mesh structure codes. ===
828
    
829
830
    bool meshFitTogether = true;

831
832
    for (RankToBoundMap::iterator it = allBound.begin(); it != allBound.end(); ++it) {
      
833
834
      MeshCodeVec &recvCodes = stdMpi.getRecvData()[it->first];
      int i = 0;
835
      
836
837
      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {
838
839
840
841
	
	MeshStructure elCode;	
	elCode.init(boundIt->rankObj);
	
842
843
	if (elCode.getCode() != recvCodes[i].getCode()) {
	  TEST_EXIT_DBG(refineManager)("Refinement manager is not set correctly!\n");
844
845
846
847
848
849
850
851
	  
	  bool b = fitElementToMeshCode(refineManager, 
					recvCodes[i], 
					boundIt->rankObj.el,
					boundIt->rankObj.ithObj, 
					boundIt->rankObj.elType);  
	  if (b)
	    meshFitTogether = false;
852
	}
853
	
854
	i++;
855
856
857
      }
    }

858
    return meshFitTogether;
859
  }
860
  
861
862
863
864
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
865
    SerUtil::serialize(out, vecSize);
866
867
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
868
      SerUtil::serialize(out, dofIndex);
869
870
871
872
873
874
875
876
877
878
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
879
    SerUtil::deserialize(in, vecSize);
880
881
882
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
883
      SerUtil::deserialize(in, dofIndex);
884
885
886
887
888
889
890
891
892
893
894
895

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
896
    SerUtil::serialize(out, mapSize);
897
898
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
899
      SerUtil::serialize(out, rank);
900
901
902
903
904
905
906
907
908
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
909
    SerUtil::deserialize(in, mapSize);
910
911
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
912
      SerUtil::deserialize(in, rank);
913
914
915
916
      deserialize(in, data[rank], dofMap);      
    }
  }

917

918
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
919
920
921
922
923
924
925
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
926
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_EVERY_EL_PREORDER);
927
928
929
930
931
932
933
934
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
935
	if (partitionData->getLevel() == 0)
936
	  elNum = element->getIndex();
937
	
Thomas Witkowski's avatar
Thomas Witkowski committed
938
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
939
940
941
942
943
944
945
946
947
948
949
950
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

951

952
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
953
  {
954
955
    FUNCNAME("ParallelDomainBase::partitionMesh()");

956
957
958
959
960
961
962
963
964
965
966
967
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

968

969
  void ParallelDomainBase::createInteriorBoundaryInfo()
970
  {
971
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
972

973
974
975
976
977
978
979
980
981
982
983
984
985
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
986
987
988

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
989

990
    TraverseStack stack;
991
992
993
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
994
995
996
997
998
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
999

1000
      // Check, if the element is within rank's partition.
1001
      if (partitionData->getPartitionStatus() == IN) {
1002
	for (int i = 0; i < nNeighbours; i++) {
1003
1004
1005
1006
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
1007
1008
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
1009

1010
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1011

1012
1013
1014
1015
1016
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

1017
	    AtomicBoundary bound;	    	    
1018
1019
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
1020
	    bound.rankObj.elType = elInfo->getType();
1021
	    bound.rankObj.subObj = subObj;
1022
	    bound.rankObj.ithObj = i;
1023
1024

	    bound.neighObj.el = elInfo->getNeighbour(i);
1025
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
1026
	    bound.neighObj.elType = -1;
1027
	    bound.neighObj.subObj = subObj;
1028
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
1029
	    
1030
1031
1032
1033
1034
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
1035

1036
	    // Get rank number of the neighbouring element.
1037
1038
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1039
1040
1041
1042
1043
1044
1045
1046
1047

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1048
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1049
1050
1051
1052
1053
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
1054
1055
1056
1057
1058
1059
1060
1061
1062
	      if (mpiRank > otherElementRank) {
		AtomicBoundary& b = myIntBoundary.getNewAtomic(otherElementRank);
		b = bound;
		b.rankObj.setReverseMode(b.neighObj, feSpace);
	      } else {
		AtomicBoundary& b = otherIntBoundary.getNewAtomic(otherElementRank);
		b = bound;	 
		b.neighObj.setReverseMode(b.rankObj, feSpace);
	      }	      
1063
	    }
1064
1065
1066
1067
1068
1069
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1070

1071
1072
1073
    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1074

Thomas Witkowski's avatar
Thomas Witkowski committed
1075
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1076

1077
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1078
1079
1080
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1081
1082
    int requestCounter = 0;

1083

1084
1085
1086
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1087
1088
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1089
      int nSendInt = rankIt->second.size();
1090
1091
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1092
1093
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1094
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1095
1096
      sendBuffers