ParallelDomainBase.cc 75.4 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
#include "MeshStructure.h"
22
23

#include "petscksp.h"
24
25
26

namespace AMDiS {

27
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
28
  {    
29
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
30
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
31
32
33
34

    return 0;
  }

35
36
37
38
39
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

40
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
41
42
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
43
					 RefinementManager *refinementManager)
44
45
    : iterationIF(iIF),
      timeIF(tIF),
46
      name(iIF->getName()),
47
48
      feSpace(fe),
      mesh(fe->getMesh()),
49
      refineManager(refinementManager),
50
      initialPartitionMesh(true),
51
      nRankDofs(0),
52
      rstart(0),
53
      nComponents(1),
54
55
      deserialized(false),
      lastMeshChangeIndex(0)
56
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
57
58
59
60
61
62
63
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

64
65
66
67
68
69
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

70
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
71
  {
72
73
74
75
76
77
78
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
79
80
      return;

81
82
83
84
85
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

86
87
88
89
90
91
92
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
93
94
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
95
    dbgCreateElementMap(elMap);
96
97
98

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
99
#endif
100

101
    // === Create new global and local DOF numbering. ===
102

103
    createLocalGlobalNumbering();
104

Thomas Witkowski's avatar
Thomas Witkowski committed
105
106
    // === Create interior boundary information ===

107
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
108

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
109
110
111
112
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
113
#if (DEBUG != 0)
114
115
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
116
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
117
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
118

119
    // === Reset all DOFAdmins of the mesh. ===
120

121
    updateDofAdmins();
122

123
124
125
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
126

127
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
128

Thomas Witkowski's avatar
Thomas Witkowski committed
129
    int globalRefinement = 0;
130
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
131

Thomas Witkowski's avatar
Thomas Witkowski committed
132
    if (globalRefinement > 0) {
133
      refineManager->globalRefine(mesh, globalRefinement);
134

135
      updateLocalGlobalNumbering();
136

137
      // === Update periodic mapping, if there are periodic boundaries. ===
138

139
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
140
    }
141
142
  }

143
144

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
145
  {}
146

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

165

166
167
168
169
170
171
172
173
174
175
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
176
	("Mesh is already refined! This does not work with parallelization!\n");
177
178
179
180
181
182
183
184
185
186
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

187

188
189
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
190
  {
191
192
193
194
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

195
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
196
197
198
199
200
201
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

202
    typedef traits::range_generator<row, Matrix>::type cursor_type;
203
204
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

205
206
207
208
209
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

210
211
212
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

213
214
215
216
217
218
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

219
      // Global index of the current row dof.
220
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
221
      // Test if the current row dof is a periodic dof.
222
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
223

224
225
226
227
228

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

229
230
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
231
232

	// Set only non null values.
233
	if (value(*icursor) != 0.0) {
234
	  // Global index of the current column index.
235
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
236
	  // Calculate the exact position of the column index in the petsc matrix.
237
238
	  int colIndex = globalColDof * dispMult + dispAddCol;

239
240
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
241
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
242
243
244
245
246
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
247
 	    cols.push_back(colIndex);
248
 	    values.push_back(value(*icursor) * scalFactor);
249

250
251
252
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
253
254
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
255
256
 	      values.push_back(value(*icursor) * scalFactor);
	    }
257
 	  } else {
258
	    // Neigher row nor column dof index is periodic, simple add entry.
259
260
261
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
262
	}
263
264
      }

265
266
267
268
269

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
270
      int rowIndex = globalRowDof * dispMult + dispAddRow;
271
      
272
      if (periodicRow) {
273
274
275
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
276
	
277
	int diagIndex = -1;
278
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
279
280
281
282
283
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
284
285
286
	  else
	    diagIndex = i;
	}
287
288
289
290
291
292
293
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
294
295
296
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

297
	// Send the row to all periodic row indices.
298
299
300
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
301
302
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
303
304
305
	}

      } else {
306
307
308
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
309
      }    
310
    }
311
  }
312

313

314
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
315
316
					int dispMult, int dispAdd)
  {
317
    // Traverse all used dofs in the dof vector.
318
319
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
320
      // Calculate global row index of the dof.
321
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
322
      // Calculate petsc index of the row dof.
323
324
325
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
326
327
328
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
329
330
331
332
333
334
335
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
336

337
      } else {
338
	// The dof index is not periodic.
339
340
341
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
342
    }    
343
344
  }

345

346
347
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
348
349
350
351
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

352
353
354
355
356
357
358
359
360
361
362
363
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

364
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
365
366
367
368
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
369
370
    int o_nnz[nRankRows];

371
372
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

373
    for (int i = 0; i < nRankRows; i++) {
374
      d_nnz[i] = 0;
375
376
      o_nnz[i] = 0;
    }
377

378
379
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
380
381
382
383
384
385
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
386
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
387
388
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
389
390
391
392
393
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

394
395
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
396
397
398
399
400
401
402
403
404
405

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
406
	      
407
408
409
410
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
411

412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
447
448
	  }
	}
449
450
451
452
453
454
455
456
457
458
459
460
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
461

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

494
      if (nSend > 0)
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
521

522
523
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
524
 	delete [] sendBuffers[j];
525
526
527
528
529
530
531
532

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
533

534
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
535

536
537
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
538
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
539
	    o_nnz[r]++;
540
541
	  else
	    d_nnz[r]++;
542
543
544
545
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
546
547

      i++;
548
    }
549
550

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
551
552
553
554
555
556
557
558
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
559

560
561
562
563
564
565
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
566
	if ((*mat)[i][j])
567
568
569
570
571
572
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
573
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
574

575
576
577
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

578
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
579
580
581
  }


582
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
583
584
585
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

586
587
588
589
590
591
592
593
594
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

595
    // === Init Petsc solver. ===
596

597
    KSP solver;
598
599
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
600
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
601
602
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
603
    KSPSetFromOptions(solver);
604
605
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
606

607
608
609

    // === Run Petsc. ===

610
    KSPSolve(solver, petscRhsVec, petscSolVec);
611

612
    // === Transfere values from Petsc's solution vectors to the dof vectors.
613
614
615
616
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
617
      DOFVector<double> *dofvec = vec.getDOFVector(i);
618
      for (int j = 0; j < nRankDofs; j++)
619
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
620
621
622
623
    }

    VecRestoreArray(petscSolVec, &vecPointer);

624
625
626

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
627
    synchVectors(vec);
628

629
630
631

    // === Print information about solution process. ===

632
633
634
635
636
637
638
639
640
641
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

642
643
644

    // === Destroy Petsc's variables. ===

645
    MatDestroy(petscMatrix);
646
647
648
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
649
650
651
652
653
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

673
    stdMpi.startCommunication<int>();
674
675
676
677
#endif

#if 1

678
679
680
681
682
683
684
685
686
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
687
688
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
689
690
691

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
692
	DOFVector<double> *dofvec = vec.getDOFVector(j);
693
694
695
696
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

697
698
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
721
	DOFVector<double> *dofvec = vec.getDOFVector(j);
722
 	for (int k = 0; k < nRecvDOFs; k++)
723
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
724
725
726
727
728
729
730
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
731
#endif
732
733
  }

734
735
736

  void ParallelDomainBase::checkMeshChange()
  {
737
738
    FUNCNAME("ParallelDomainBase::checkMeshChange()");

739
740
    // === If mesh has not been changed, return. ===

741
742
743
    if (mesh->getChangeIndex() == lastMeshChangeIndex)
      return;

744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772

    // === Create mesh structure codes for all ranks boundary elements. ===

    typedef std::vector<MeshStructure> MeshCodeVec;
    std::map<int, MeshCodeVec > sendCodes;

    for (RankToBoundMap::iterator it = myIntBoundary.boundary.begin();
	 it != myIntBoundary.boundary.end(); ++it) {    

      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
	elCode.init(boundIt->rankObj.el, boundIt->rankObj.ithObj, 
		    boundIt->rankObj.elType);
	sendCodes[it->first].push_back(elCode);
      }
    }

    StdMpi<MeshCodeVec, MeshCodeVec> stdMpi(mpiComm);
    stdMpi.prepareCommunication(true);
    stdMpi.send(sendCodes);

    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it)
      stdMpi.recv(it->first);

    stdMpi.startCommunication<unsigned long int>();

773
774
775
776
777

    // === Compare received mesh structure codes. ===

    bool meshFitTogether = true;

778
779
780
    for (RankToBoundMap::iterator it = otherIntBoundary.boundary.begin();
	 it != otherIntBoundary.boundary.end(); ++it) {

781
782
783
      MeshCodeVec &recvCodes = stdMpi.getRecvData()[it->first];
      int i = 0;

784
785
786
787
      for (std::vector<AtomicBoundary>::iterator boundIt = it->second.begin();
	   boundIt != it->second.end(); ++boundIt) {

	MeshStructure elCode;
788
	elCode.init(boundIt->rankObj.el, boundIt->rankObj.ithObj, 
789
		    boundIt->rankObj.elType);
790

791
792
793
794
795
796
	if (elCode.getCode() != recvCodes[i].getCode()) {
	  TEST_EXIT_DBG(refineManager)("Refinement manager is not set correctly!\n");
	  //	  recvCodes[i].reset();
// 	  fitElementToMeshCode(refineManager, recvCodes[i], boundIt->rankObj.el,
// 			       boundIt->rankObj.ithObj, boundIt->rankObj.elType);

797
	  meshFitTogether = false;
798
	}
799
800

	i++;
801
802
803
      }
    }

804
805
806
807
    if (!meshFitTogether) {
      std::cout << "MESH HAS BEEN CHANGED!" << std::endl;
      exit(0);    
    }
808

809
    updateLocalGlobalNumbering();
810
811
  }

812
813
814
815
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
816
    SerUtil::serialize(out, vecSize);
817
818
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
819
      SerUtil::serialize(out, dofIndex);
820
821
822
823
824
825
826
827
828
829
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
830
    SerUtil::deserialize(in, vecSize);
831
832
833
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
834
      SerUtil::deserialize(in, dofIndex);
835
836
837
838
839
840
841
842
843
844
845
846

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
847
    SerUtil::serialize(out, mapSize);
848
849
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
850
      SerUtil::serialize(out, rank);
851
852
853
854
855
856
857
858
859
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
860
    SerUtil::deserialize(in, mapSize);
861
862
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
863
      SerUtil::deserialize(in, rank);
864
865
866
867
      deserialize(in, data[rank], dofMap);      
    }
  }

868

869
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
887
	if (partitionData->getLevel() == 0)
888
	  elNum = element->getIndex();
889
	
Thomas Witkowski's avatar
Thomas Witkowski committed
890
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
891
892
893
894
895
896
897
898
899
900
901
902
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

903

904
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
905
906
907
908
909
910
911
912
913
914
915
916
917
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

918

919
  void ParallelDomainBase::createInteriorBoundaryInfo()
920
  {
921
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
922

923
924
925
926
927
928
929
930
931
932
933
934
935
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
936
937
938

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
939

940
    TraverseStack stack;
941
942
943
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
944
945
946
947
948
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
949

950
      // Check, if the element is within rank's partition.
951
      if (partitionData->getPartitionStatus() == IN) {
952
	for (int i = 0; i < nNeighbours; i++) {
953
954
955
956
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
957
958
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
959

960
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
961

962
963
964
965
966
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

967
	    AtomicBoundary bound;	    	    
968
969
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
970
	    bound.rankObj.elType = elInfo->getType();
971
	    bound.rankObj.subObj = subObj;
972
	    bound.rankObj.ithObj = i;
973
974
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
975
976
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
977
	    bound.neighObj.elType = -1;
978
	    bound.neighObj.subObj = subObj;
979
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
980
	    
981
982
983
984
985
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
986

987
	    // Get rank number of the neighbouring element.
988
989
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

990
991
992
993
994
995
996
997
998

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

999
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1010
1011
1012
1013
1014
1015
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1016

1017
1018
1019
1020

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1021

Thomas Witkowski's avatar
Thomas Witkowski committed
1022
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1023

1024
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1025
1026
1027
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1028
1029
    int requestCounter = 0;

1030

1031
1032
1033
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1034
1035
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
1036
      int nSendInt = rankIt->second.size();
1037
1038
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
1039
1040
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1041
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1042
1043
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1044
      request[requestCounter++] =
1045
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1046
1047
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1048
1049
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1050
      int nRecvInt = rankIt->second.size();
1051
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1052
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1053

Thomas Witkowski's avatar
Thomas Witkowski committed
1054
      request[requestCounter++] = 
1055
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1056
1057
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1058
1059
1060

    MPI::Request::Waitall(requestCounter, request);

1061
1062
1063
1064
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1065
1066

    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
1067
1068
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1069
1070
1071
1072
1073
1074
1075

      // === We have received from rank "rankIt->first" the ordered list of element ===
      // === indices. We now have to sort the corresponding list in this rank to    ===
      // === get the same order.                                                    ===
      
      for (int j = 0; j < static_cast<int>(rankIt->second.size()); j++) {
	// If the expected object is not at place, search for it.
1076
1077
	if ((rankIt->second)[j].neighObj.elIndex != recvBuffers[i][j * 2] ||
	    (rankIt->second)[j].neighObj.ithObj != recvBuffers[i][j * 2 + 1]) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1078
1079
	  int k = j + 1;
	  for (; k < static_cast<int>(rankIt->second.size()); k++)
1080
1081
	    if ((rankIt->second)[k].neighObj.elIndex == recvBuffers[i][j * 2] && 
		(rankIt->second)[k].neighObj.ithObj == recvBuffers[i][j * 2 + 1])
Thomas Witkowski's avatar
Thomas Witkowski committed
1082
1083
1084
	      break;

	  // The element must always be found, because the list is just in another order.
Thomas Witkowski's avatar
Thomas Witkowski committed
1085
	  TEST_EXIT_DBG(k < static_cast<int>(rankIt->second.size()))
1086
	    ("Should never happen!\n");
Thomas Witkowski's avatar
Thomas Witkowski committed
1087
1088
1089
1090
1091
1092
1093
1094

	  // Swap the current with the found element.
	  AtomicBoundary tmpBound = (rankIt->second)[k];
	  (rankIt->second)[k] = (rankIt->second)[j];
	  (rankIt->second)[j] = tmpBound;	
	}
      }

1095
1096
      delete [] recvBuffers[i];
      i++;
Thomas Witkowski's avatar
Thomas Witkowski committed
1097
1098
1099
1100
    }

    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
1101
1102
1103
1104
1105

    recvBuffers.clear();
    sendBuffers.clear();
 

1106
    // === Do the same for the periodic boundaries. ===
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119

    requestCounter = 0;

    for (RankToBoundMap::iterator rankIt = periodicBoundary.boundary.begin();
	 rankIt != periodicBoundary.boundary.end(); ++rankIt) {

      TEST_EXIT_DBG(rankIt->first != mpiRank)
	("It is no possible to have an interior boundary within a rank partition!\n");

      if (rankIt->first < mpiRank) {
	int nSendInt = rankIt->second.size();
	int* buffer = new int[nSendInt * 2];
	for (int i = 0; i < nSendInt; i++) {
Thomas Witkowski's avatar