Liebe Gitlab-Nutzer, lieber Gitlab-Nutzer,
es ist nun möglich sich mittels des ZIH-Logins/LDAP an unserem Dienst anzumelden. Die Konten der externen Nutzer:innen sind über den Reiter "Standard" erreichbar.
Die Administratoren


Dear Gitlab user,
it is now possible to log in to our service using the ZIH login/LDAP. The accounts of external users can be accessed via the "Standard" tab.
The administrators

ParallelDomainBase.cc 75.3 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1 2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4 5 6 7 8 9 10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11 12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16 17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20 21

#include "petscksp.h"
22 23 24

namespace AMDiS {

25
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
26
  {    
27
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
28
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
29 30 31 32

    return 0;
  }

33 34 35 36 37
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

38
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
39 40 41
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
42 43
    : iterationIF(iIF),
      timeIF(tIF),
44
      name(iIF->getName()),
45 46
      feSpace(fe),
      mesh(fe->getMesh()),
47
      refinementManager(refineManager),
48
      initialPartitionMesh(true),
49
      nRankDofs(0),
50
      rstart(0),
51 52
      nComponents(1),
      deserialized(false)
53
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
54 55 56 57 58 59 60
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

61 62 63 64 65 66
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

67
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
68
  {
69 70 71 72 73 74 75
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
76 77
      return;

78 79 80 81 82
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

83 84 85 86 87 88 89
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
90 91 92
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
    DbgCreateElementMap(elMap);
93 94 95

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
96
#endif
97

98
    // === Create new global and local DOF numbering. ===
99

100
    // Set of all DOFs of the rank.
101
    std::vector<const DegreeOfFreedom*> rankDofs;
102
    // Number of DOFs in ranks partition that are owned by the rank.
103
    nRankDofs = 0;
104
    // Number of all DOFs in the macro mesh.
105
    int nOverallDOFs = 0;
106

107
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
108

Thomas Witkowski's avatar
Thomas Witkowski committed
109 110
    // === Create interior boundary information ===

111
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
112

Thomas Witkowski's avatar
n  
Thomas Witkowski committed
113 114 115 116
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
117
#if (DEBUG != 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
118
    DbgTestElementMap(elMap);
Thomas Witkowski's avatar
Thomas Witkowski committed
119 120
    DbgTestInteriorBoundary();
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
121

122
    // === Reset all DOFAdmins of the mesh. ===
123

124
    updateDofAdmins();
125

126 127 128
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
129

130

131
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
132

Thomas Witkowski's avatar
Thomas Witkowski committed
133 134 135
    int globalRefinement = 0;
    GET_PARAMETER(0, "testgr", "%d", &globalRefinement);

Thomas Witkowski's avatar
Thomas Witkowski committed
136 137
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
138

139 140 141 142 143
#if (DEBUG != 0)
      elMap.clear();
      DbgCreateElementMap(elMap);
#endif

144
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
145 146 147 148 149 150

      updateDofAdmins();

#if (DEBUG != 0)
      DbgTestElementMap(elMap);
#endif
151

152
      // === Update periodic mapping, if there are periodic boundaries. ===
153
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
154 155
    }

156

Thomas Witkowski's avatar
Thomas Witkowski committed
157
#if (DEBUG != 0)
158
    DbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
159
#endif
160

161
    nRankRows = nRankDofs * nComponents;
162
    nOverallRows = nOverallDOFs * nComponents;
163 164
  }

165 166

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
167
  {}
168

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

187

188 189 190 191 192 193 194 195 196 197
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
198
	("Mesh is already refined! This does not work with parallelization!\n");
199 200 201 202 203 204 205 206 207 208
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

209

210 211
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
212
  {
213 214 215 216
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

217
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
218 219 220 221 222 223
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

224
    typedef traits::range_generator<row, Matrix>::type cursor_type;
225 226
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

227 228 229 230 231
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

232 233 234
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

235 236 237 238 239 240
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

241
      // Global index of the current row dof.
242
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
243
      // Test if the current row dof is a periodic dof.
244
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
245

246 247 248 249 250

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

251 252
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
253 254

	// Set only non null values.
255
	if (value(*icursor) != 0.0) {
256
	  // Global index of the current column index.
257
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
258
	  // Calculate the exact position of the column index in the petsc matrix.
259 260
	  int colIndex = globalColDof * dispMult + dispAddCol;

261 262
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
263
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
264 265 266 267 268
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
269
 	    cols.push_back(colIndex);
270
 	    values.push_back(value(*icursor) * scalFactor);
271

272 273 274
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
275 276
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
277 278
 	      values.push_back(value(*icursor) * scalFactor);
	    }
279
 	  } else {
280
	    // Neigher row nor column dof index is periodic, simple add entry.
281 282 283
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
284
	}
285 286
      }

287 288 289 290 291

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
292
      int rowIndex = globalRowDof * dispMult + dispAddRow;
293
      
294
      if (periodicRow) {
295 296 297
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
298
	
299
	int diagIndex = -1;
300
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
301 302 303 304 305
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
306 307 308
	  else
	    diagIndex = i;
	}
309 310 311 312 313 314 315
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
316 317 318
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

319
	// Send the row to all periodic row indices.
320 321 322
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
323 324
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
325 326 327
	}

      } else {
328 329 330
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
331
      }    
332
    }
333
  }
334

335

336
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
337 338
					int dispMult, int dispAdd)
  {
339
    // Traverse all used dofs in the dof vector.
340 341
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
342
      // Calculate global row index of the dof.
343
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
344
      // Calculate petsc index of the row dof.
345 346 347
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
348 349 350
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
351 352 353 354 355 356 357
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
358

359
      } else {
360
	// The dof index is not periodic.
361 362 363
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
364
    }    
365 366
  }

367

368 369
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
370 371 372 373
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

374 375 376 377
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

378 379 380 381 382 383 384 385 386 387 388 389
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

390 391 392 393 394
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

395
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
396 397 398

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
399 400 401 402 403
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
404 405 406 407
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

408 409 410 411 412 413 414 415 416 417 418 419
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

420
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
421 422 423 424
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
425 426
    int o_nnz[nRankRows];

427 428
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

429
    for (int i = 0; i < nRankRows; i++) {
430
      d_nnz[i] = 0;
431 432
      o_nnz[i] = 0;
    }
433

434 435
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
436 437 438 439 440 441
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
442
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
443 444
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
445 446 447 448 449
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

450 451
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
452 453 454 455 456 457 458 459 460 461

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
462
	      
463 464 465 466
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
467

468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
503 504
	  }
	}
505 506 507 508 509 510 511 512 513 514 515 516
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
517

518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

550
      if (nSend > 0)
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
577

578 579
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
580
 	delete [] sendBuffers[j];
581 582 583 584 585 586 587 588

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
589

590
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
591

592 593
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
594
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
595
	    o_nnz[r]++;
596 597
	  else
	    d_nnz[r]++;
598 599 600 601
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
602 603

      i++;
604
    }
605 606

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
607 608 609 610 611 612 613 614
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
615

616 617 618 619 620 621
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
622
	if ((*mat)[i][j])
623 624 625 626 627 628
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
629
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
630

631 632 633
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

634
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
635 636 637
  }


638
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
639
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
640 641
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

642
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
643

644
    KSP ksp;
645
    KSPCreate(PETSC_COMM_WORLD, &ksp);
646
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
647
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
648
    KSPSetType(ksp, KSPBCGS);
649
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
650 651 652 653 654
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

655
    for (int i = 0; i < nRankDofs; i++)
656
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
657 658 659

    VecRestoreArray(petscSolVec, &vecPointer);

660 661
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
662 663 664

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
665 666
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
667
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
668
	 sendIt != sendDofs.end(); ++sendIt, i++) {
669 670
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
671

672
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
673
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
674

Thomas Witkowski's avatar
Thomas Witkowski committed
675 676
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
677 678 679
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
680
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
681
	 recvIt != recvDofs.end(); ++recvIt, i++) {
682 683
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
684

Thomas Witkowski's avatar
Thomas Witkowski committed
685 686
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
687 688
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
689 690 691

    MPI::Request::Waitall(requestCounter, request);

692
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
693
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
694
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
695
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
696
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
697 698 699 700

      delete [] recvBuffers[i];
    }
    
701
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
702
      delete [] sendBuffers[i];
703 704

    MatDestroy(petscMatrix);
705 706 707
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
708 709
  }

710

711
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
712 713 714
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

715 716 717 718 719 720 721 722 723
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

724
    // === Init Petsc solver. ===
725

726
    KSP solver;
727 728
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
729
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
730 731
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
732
    KSPSetFromOptions(solver);
733 734
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
735

736 737 738

    // === Run Petsc. ===

739
    KSPSolve(solver, petscRhsVec, petscSolVec);
740

741
    // === Transfere values from Petsc's solution vectors to the dof vectors.
742 743 744 745
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
746
      DOFVector<double> *dofvec = vec.getDOFVector(i);
747
      for (int j = 0; j < nRankDofs; j++)
748
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
749 750 751 752
    }

    VecRestoreArray(petscSolVec, &vecPointer);

753 754 755

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
756
    synchVectors(vec);
757

758 759 760

    // === Print information about solution process. ===

761 762 763 764 765 766 767 768 769 770
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

771 772 773

    // === Destroy Petsc's variables. ===

774
    MatDestroy(petscMatrix);
775 776 777
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
778 779 780 781 782
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
783 784 785 786 787 788 789 790 791
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
792 793
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
794 795 796

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
797
	DOFVector<double> *dofvec = vec.getDOFVector(j);
798 799 800 801
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

802 803
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
826
	DOFVector<double> *dofvec = vec.getDOFVector(j);
827
 	for (int k = 0; k < nRecvDOFs; k++)
828
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
829 830 831 832 833 834 835 836 837
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
  }

838 839 840 841
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
842
    SerUtil::serialize(out, vecSize);
843 844
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
845
      SerUtil::serialize(out, dofIndex);
846 847 848 849 850 851 852 853 854 855
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
856
    SerUtil::deserialize(in, vecSize);
857 858 859
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
860
      SerUtil::deserialize(in, dofIndex);
861 862 863 864 865 866 867 868 869 870 871 872

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
873
    SerUtil::serialize(out, mapSize);
874 875
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
876
      SerUtil::serialize(out, rank);
877 878 879 880 881 882 883 884 885
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
886
    SerUtil::deserialize(in, mapSize);
887 888
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
889
      SerUtil::deserialize(in, rank);
890 891 892 893
      deserialize(in, data[rank], dofMap);      
    }
  }

894

895
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
913
	if (partitionData->getLevel() == 0)
914
	  elNum = element->getIndex();
915
	
Thomas Witkowski's avatar
Thomas Witkowski committed
916
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
917 918 919 920 921 922 923 924 925 926 927 928
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

929

930
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
931 932 933 934 935 936 937 938 939 940 941