From 53c5f965810f54fb7ecd4431dd3acca1a2fb2b02 Mon Sep 17 00:00:00 2001
From: Thomas Witkowski <thomas.witkowski@gmx.de>
Date: Tue, 26 Jun 2012 11:06:12 +0000
Subject: [PATCH] Some work on using the nnz structure of the matrix in FETI-DP
 method.

---
 AMDiS/src/parallel/MeshDistributor.cc         |  15 +-
 AMDiS/src/parallel/MeshDistributor.h          |   5 +
 AMDiS/src/parallel/ParMetisPartitioner.h      |   2 +-
 AMDiS/src/parallel/ParallelDofMapping.h       |  19 +
 AMDiS/src/parallel/PetscSolverFeti.cc         |  33 +-
 AMDiS/src/parallel/PetscSolverGlobalMatrix.cc | 387 +++++++++++++++---
 AMDiS/src/parallel/PetscSolverGlobalMatrix.h  |  21 +-
 7 files changed, 415 insertions(+), 67 deletions(-)

diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 08150a64..8dfafe07 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -85,6 +85,7 @@ namespace AMDiS {
       repartitionIthChange(20),
       nMeshChangesAfterLastRepartitioning(0),
       repartitioningCounter(0),
+      repartitioningFailed(0),
       debugOutputDir(""),
       lastMeshChangeIndex(0),
       createBoundaryDofFlag(0),
@@ -1006,9 +1007,13 @@ namespace AMDiS {
     INFO(info, 8)("Parallel mesh adaption needed %.5f seconds\n", 
 		  MPI::Wtime() - first);
 
-    if (tryRepartition &&
-	repartitioningAllowed && 
-	nMeshChangesAfterLastRepartitioning >= repartitionIthChange) {
+    if (repartitioningFailed > 0) {
+      MSG_DBG("Repartitioning not tried because it has failed in the past!\n");
+
+      repartitioningFailed--;
+    } else if (tryRepartition &&
+	       repartitioningAllowed && 
+	       nMeshChangesAfterLastRepartitioning >= repartitionIthChange) {
       repartitionMesh();
       nMeshChangesAfterLastRepartitioning = 0;
     } else {
@@ -1314,6 +1319,7 @@ namespace AMDiS {
     bool partitioningSucceed = 
       partitioner->partition(elemWeights, ADAPTIVE_REPART);
     if (!partitioningSucceed) {
+      repartitioningFailed = 20;
       MSG("Mesh partitioner created empty partition!\n");
       return;
     }
@@ -1321,8 +1327,9 @@ namespace AMDiS {
     // In the case the partitioner does not create a new mesh partition, return
     // without and changes.
     if (!partitioner->meshChanged()) {
+      repartitioningFailed = 20;
       MSG("Mesh partition does not create a new partition!\n");
-	return;
+      return;
     }
 
     TEST_EXIT_DBG(!(partitioner->getSendElements().size() == mesh->getMacroElements().size() && 
diff --git a/AMDiS/src/parallel/MeshDistributor.h b/AMDiS/src/parallel/MeshDistributor.h
index 4878f469..f47d378d 100644
--- a/AMDiS/src/parallel/MeshDistributor.h
+++ b/AMDiS/src/parallel/MeshDistributor.h
@@ -527,6 +527,11 @@ namespace AMDiS {
     /// variable is used only for debug outputs.
     int repartitioningCounter;
 
+    /// If repartitioning of the mesh fail, this variable has a positive value
+    /// that gives the number of mesh changes the mesh distributer will wait
+    /// before trying new mesh repartitioning.
+    int repartitioningFailed;
+
     /// Directory name where all debug output files should be written to.
     string debugOutputDir;
 
diff --git a/AMDiS/src/parallel/ParMetisPartitioner.h b/AMDiS/src/parallel/ParMetisPartitioner.h
index 445d1667..210e3738 100644
--- a/AMDiS/src/parallel/ParMetisPartitioner.h
+++ b/AMDiS/src/parallel/ParMetisPartitioner.h
@@ -171,7 +171,7 @@ namespace AMDiS {
     ParMetisPartitioner(MPI::Intracomm *comm)
       : MeshPartitioner(comm),
         parMetisMesh(NULL),
-	itr(1000.0)
+	itr(1000000.0)
     {}
 
     ~ParMetisPartitioner();
diff --git a/AMDiS/src/parallel/ParallelDofMapping.h b/AMDiS/src/parallel/ParallelDofMapping.h
index c4ffdbda..32eea630 100644
--- a/AMDiS/src/parallel/ParallelDofMapping.h
+++ b/AMDiS/src/parallel/ParallelDofMapping.h
@@ -131,6 +131,25 @@ namespace AMDiS {
 
       return dofMap[d];
     }
+
+    /** \brief
+     * Searchs the map for a given DOF. It does not fail, if the DOF is not 
+     * mapped by this mapping. In this case, it returns false. If the DOF is 
+     * mapped, the result is stored and the function returns true.
+     *
+     * \param[in]    dof     DOF index for which a mapping is searched.
+     * \param[out]   index   In the case that the DOF is mapped, the result
+     *                       is stored here.
+     */
+    inline bool find(DegreeOfFreedom dof, MultiIndex& index)
+    {
+      DofMap::iterator it = dofMap.find(dof);
+      if (it == dofMap.end())
+	return false;
+
+      index = it->second;
+      return true;
+    }
     
     /// Inserts a new DOF to rank's mapping. The DOF is assumed to be owend by
     /// the rank.
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index a95752f9..b1094666 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -269,11 +269,11 @@ namespace AMDiS {
 
       if (meshLevel == 0) {
 	subdomain->setMeshDistributor(meshDistributor, 
-					    mpiCommGlobal, mpiCommLocal);
+				      mpiCommGlobal, mpiCommLocal);
       } else {
 	subdomain->setMeshDistributor(meshDistributor, 
-					    levelData.getMpiComm(meshLevel - 1),
-					    levelData.getMpiComm(meshLevel));
+				      levelData.getMpiComm(meshLevel - 1),
+				      levelData.getMpiComm(meshLevel));
 	subdomain->setLevel(meshLevel);
       }
     }
@@ -417,12 +417,11 @@ namespace AMDiS {
 	("Should not happen!\n");
     }
 
-    // If multi level test, inform sub domain solver about coarse space.
     subdomain->setDofMapping(&localDofMap, &primalDofMap);
 
     if (printTimings) {
       timeCounter = MPI::Wtime() - timeCounter;
-      MSG("FETI-DP timing 01: %.5f seconds (creation of basic data structures)", 
+      MSG("FETI-DP timing 01: %.5f seconds (creation of basic data structures)\n", 
 	  timeCounter);
     }
   }
@@ -693,6 +692,7 @@ namespace AMDiS {
 		 lagrangeMap.getOverallDofs(), nGlobalOverallInterior,
 		 2, PETSC_NULL, 2, PETSC_NULL,
 		 &mat_lagrange);
+    MatSetOption(mat_lagrange, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 
     // === Create for all duals the corresponding Lagrange constraints. On ===
     // === each rank we traverse all pairs (n, m) of ranks, with n < m,    ===
@@ -792,7 +792,9 @@ namespace AMDiS {
       MatCreateAIJ(mpiCommGlobal,
 		   nRowsRankB, nRowsRankPrimal, 
 		   nGlobalOverallInterior, nRowsOverallPrimal,
-		   30, PETSC_NULL, 30, PETSC_NULL, &matBPi);
+		   150, PETSC_NULL, 150, PETSC_NULL, &matBPi);
+      MatSetOption(matBPi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+
       Mat matPrimal;
 
       PetscInt nCols;
@@ -1209,7 +1211,7 @@ namespace AMDiS {
     // === Create matrices for the FETI-DP method. ===
    
     double wtime = MPI::Wtime();
-
+  
     subdomain->fillPetscMatrix(mat);
 
     if (printTimings)
@@ -1225,23 +1227,30 @@ namespace AMDiS {
       int nRowsDual = dualDofMap.getRankDofs();
 
       MatCreateSeqAIJ(PETSC_COMM_SELF,
-		      nRowsDual, nRowsDual, 60, PETSC_NULL,
+		      nRowsDual, nRowsDual, 100, PETSC_NULL,
 		      &mat_duals_duals);
+      MatSetOption(mat_duals_duals, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 
       if (fetiPreconditioner == FETI_DIRICHLET) {
 	int nRowsInterior = interiorDofMap.getRankDofs();
 
 	MatCreateSeqAIJ(PETSC_COMM_SELF,
-			nRowsInterior, nRowsInterior, 60, PETSC_NULL,
+			nRowsInterior, nRowsInterior, 100, PETSC_NULL,
 			&mat_interior_interior);
-	
+	MatSetOption(mat_interior_interior, 
+		     MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+
 	MatCreateSeqAIJ(PETSC_COMM_SELF,
-			nRowsInterior, nRowsDual, 60, PETSC_NULL,
+			nRowsInterior, nRowsDual, 100, PETSC_NULL,
 			&mat_interior_duals);
+	MatSetOption(mat_interior_duals, 
+		     MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 	
 	MatCreateSeqAIJ(PETSC_COMM_SELF,
-			nRowsDual, nRowsInterior, 60, PETSC_NULL,
+			nRowsDual, nRowsInterior, 100, PETSC_NULL,
 			&mat_duals_interior);
+	MatSetOption(mat_duals_interior, 
+		     MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
       }
     
       // === Prepare traverse of sequentially created matrices. ===
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
index 48d55f1c..06552c19 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
@@ -33,31 +33,10 @@ namespace AMDiS {
     
     double wtime = MPI::Wtime();
 
-    
-    // === Check if mesh was changed and, in this case, recompute matrix ===
-    // === nnz structure and matrix indices.                             ===
-
-    int recvAllValues = 0;
-    int sendValue = 
-      static_cast<int>(meshDistributor->getLastMeshChangeIndex() != lastMeshNnz);
-    mpiCommGlobal.Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM);
+    // === If required, recompute non zero structure of the matrix. ===
 
-    if (!d_nnz || recvAllValues != 0 || alwaysCreateNnzStructure) {
-      vector<const FiniteElemSpace*> feSpaces = getFeSpaces(mat);
-      interiorMap->setComputeMatIndex(true, true);
-      interiorMap->update(feSpaces);
-
-      if (d_nnz) {
-	delete [] d_nnz;
-	d_nnz = NULL;
-	delete [] o_nnz;
-	o_nnz = NULL;
-      }
-
-      updateSubdomainData();
+    if (checkMeshChange(mat))
       createPetscNnzStructure(mat);
-      lastMeshNnz = meshDistributor->getLastMeshChangeIndex();
-    }
 
 
     // === Create PETSc vector (solution and a temporary vector). ===
@@ -72,7 +51,7 @@ namespace AMDiS {
 
     MatCreateAIJ(mpiCommGlobal, nRankRows, nRankRows, 
 		 nOverallRows, nOverallRows,
-		 0, d_nnz, 0, o_nnz, &matIntInt);
+		 0, matInteriorDiagNnz, 0, matInteriorOffdiagNnz, &matIntInt);
 
     MatSetOption(matIntInt, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 
@@ -149,16 +128,34 @@ namespace AMDiS {
     int nRowsRankInterior = interiorMap->getRankDofs();
     int nRowsOverallInterior = interiorMap->getOverallDofs();
 
-    if (subdomainLevel == 0) {
+    // === If required, recompute non zero structure of the matrix. ===
+
+    bool localMatrix = (subdomainLevel == 0);
+    if (checkMeshChange(mat, localMatrix)) {
+      createPetscNnzStructureWithCoarseSpace(mat, 
+					     *interiorMap, 
+					     matInteriorDiagNnz,
+					     matInteriorOffdiagNnz,
+					     localMatrix);
+
+      if (coarseSpaceMap)
+	createPetscNnzStructureWithCoarseSpace(mat, 
+					       *coarseSpaceMap, 
+					       matCoarseDiagNnz,
+					       matCoarseOffdiagNnz);	
+    }
+
+    if (localMatrix) {
       MatCreateSeqAIJ(mpiCommLocal, nRowsRankInterior, nRowsRankInterior,
-		      60, PETSC_NULL, &matIntInt);
+		      0, matInteriorDiagNnz, &matIntInt);
     } else {
       MatCreateAIJ(mpiCommLocal, 
 		   nRowsRankInterior, nRowsRankInterior,
 		   nRowsOverallInterior, nRowsOverallInterior,
-		   60, PETSC_NULL, 60, PETSC_NULL, &matIntInt);
+		   0, matInteriorDiagNnz, 0, matInteriorOffdiagNnz, 
+		   &matIntInt);
     }
-
+    
     if (coarseSpaceMap) {
       int nRowsRankCoarse = coarseSpaceMap->getRankDofs();
       int nRowsOverallCoarse = coarseSpaceMap->getOverallDofs();
@@ -166,17 +163,23 @@ namespace AMDiS {
       MatCreateAIJ(mpiCommGlobal,
 		   nRowsRankCoarse, nRowsRankCoarse,
 		   nRowsOverallCoarse, nRowsOverallCoarse,
-		   60, PETSC_NULL, 60, PETSC_NULL, &matCoarseCoarse);
-      
+		   0, matCoarseDiagNnz, 0, matCoarseOffdiagNnz, 
+		   &matCoarseCoarse);
+
       MatCreateAIJ(mpiCommGlobal,
 		   nRowsRankCoarse, nRowsRankInterior,
 		   nRowsOverallCoarse, nGlobalOverallInterior,
-		   60, PETSC_NULL, 60, PETSC_NULL, &matCoarseInt);
-      
+		   100, PETSC_NULL, 100, PETSC_NULL,
+		   &matCoarseInt);
+      MatSetOption(matCoarseInt, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+
       MatCreateAIJ(mpiCommGlobal,
 		   nRowsRankInterior, nRowsRankCoarse,
 		   nGlobalOverallInterior, nRowsOverallCoarse,
-		   60, PETSC_NULL, 60, PETSC_NULL, &matIntCoarse);
+		   100, PETSC_NULL, 100, PETSC_NULL,
+		   //	   0, matInteriorDiagNnz, 0, matCoarseOffdiagNnz,
+		   &matIntCoarse);
+      MatSetOption(matIntCoarse, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
     }
 
     // === Prepare traverse of sequentially created matrices. ===
@@ -564,6 +567,52 @@ namespace AMDiS {
   }
 
 
+  bool PetscSolverGlobalMatrix::checkMeshChange(Matrix<DOFMatrix*> *mat,
+						bool localMatrix)
+  {
+    FUNCNAME("PetscSolverGlobalMatrix::checkMeshChange()");
+
+    int recvAllValues = 0;
+    int sendValue = 
+      static_cast<int>(meshDistributor->getLastMeshChangeIndex() != lastMeshNnz);
+    mpiCommGlobal.Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM);
+
+    if (!matInteriorDiagNnz || recvAllValues != 0 || alwaysCreateNnzStructure) {
+      vector<const FiniteElemSpace*> feSpaces = getFeSpaces(mat);
+      
+      interiorMap->setComputeMatIndex(true, !localMatrix);
+      interiorMap->update(feSpaces);
+
+      if (matInteriorDiagNnz) {
+	delete [] matInteriorDiagNnz;
+	matInteriorDiagNnz = NULL;
+      }
+      
+      if (matInteriorOffdiagNnz) {
+	delete [] matInteriorOffdiagNnz;
+	matInteriorOffdiagNnz = NULL;
+      }
+
+      if (matCoarseDiagNnz) {
+	delete [] matCoarseDiagNnz;
+	matCoarseDiagNnz = NULL;
+      }
+      
+      if (matCoarseOffdiagNnz) {
+	delete [] matCoarseOffdiagNnz;
+	matCoarseOffdiagNnz = NULL;
+      }
+
+      updateSubdomainData();
+      lastMeshNnz = meshDistributor->getLastMeshChangeIndex();
+
+      return true;
+    }
+
+    return false;
+  }
+
+
   void PetscSolverGlobalMatrix::createFieldSplit(PC pc)
   {
     FUNCNAME("PetscSolverGlobalMatrix::createFieldSplit()");
@@ -985,18 +1034,17 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverGlobalMatrix::createPetscNnzStructure()");
 
-    TEST_EXIT_DBG(!d_nnz)("There is something wrong!\n");
-    TEST_EXIT_DBG(!o_nnz)("There is something wrong!\n");
+    TEST_EXIT_DBG(!matInteriorDiagNnz)("There is something wrong!\n");
 
     vector<const FiniteElemSpace*> feSpaces = getFeSpaces(mat);
     int nRankRows = interiorMap->getRankDofs();
     int rankStartIndex = interiorMap->getStartDofs();
 
-    d_nnz = new int[nRankRows];
-    o_nnz = new int[nRankRows];
+    matInteriorDiagNnz = new int[nRankRows];
+    matInteriorOffdiagNnz = new int[nRankRows];
     for (int i = 0; i < nRankRows; i++) {
-      d_nnz[i] = 0;
-      o_nnz[i] = 0;
+      matInteriorDiagNnz[i] = 0;
+      matInteriorOffdiagNnz[i] = 0;
     }
 
     using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
@@ -1091,13 +1139,13 @@ namespace AMDiS {
 	      
 	      if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) {
 		// The row DOF is a rank DOF, if also the column is a rank DOF, 
-		// increment the d_nnz values for this row, otherwise the 
-		// o_nnz value.
+		// increment the matInteriorDiagNnz values for this row, 
+		// otherwise the matInteriorDiagNnz value.
 		if (petscColIdx >= rankStartIndex && 
 		    petscColIdx < rankStartIndex + nRankRows)
-		  d_nnz[localPetscRowIdx]++;
+		  matInteriorDiagNnz[localPetscRowIdx]++;
 		else
-		  o_nnz[localPetscRowIdx]++;
+		  matInteriorOffdiagNnz[localPetscRowIdx]++;
 	      }    
 	    }
 	  } else {
@@ -1154,9 +1202,9 @@ namespace AMDiS {
 	     r, localRowIdx, nRankRows, it->first);
 	  
 	  if (c < rankStartIndex || c >= rankStartIndex + nRankRows)
-	    o_nnz[localRowIdx]++;
+	    matInteriorOffdiagNnz[localRowIdx]++;
 	  else
-	    d_nnz[localRowIdx]++;
+	    matInteriorDiagNnz[localRowIdx]++;
 	}
       }
     }
@@ -1169,7 +1217,254 @@ namespace AMDiS {
 
     if (nRankRows < 100) 
       for (int i = 0; i < nRankRows; i++)
-	d_nnz[i] = std::min(d_nnz[i], nRankRows);
+	matInteriorDiagNnz[i] = std::min(matInteriorDiagNnz[i], nRankRows);
   }
 
+
+  void PetscSolverGlobalMatrix::createPetscNnzStructureWithCoarseSpace(Matrix<DOFMatrix*> *mat,
+								       ParallelDofMapping &dofMap,
+								       int*& diagNnz, 
+								       int*& offdiagNnz,
+								       bool localMatrix)
+  {
+    FUNCNAME("PetscSolverGlobalMatrix::createPetscNnzStructure()");
+
+    TEST_EXIT_DBG(!diagNnz)("There is something wrong!\n");
+
+    vector<const FiniteElemSpace*> feSpaces = getFeSpaces(mat);
+    int nRankRows = dofMap.getRankDofs();
+    int rankStartIndex = dofMap.getStartDofs();
+
+    diagNnz = new int[nRankRows];
+    for (int i = 0; i < nRankRows; i++)
+      diagNnz[i] = 0;
+
+    if (localMatrix == false) {
+      offdiagNnz = new int[nRankRows];
+      for (int i = 0; i < nRankRows; i++)
+	offdiagNnz[i] = 0;
+    }
+
+    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
+    namespace traits = mtl::traits;
+    typedef DOFMatrix::base_matrix_type Matrix;
+    typedef vector<pair<int, int> > MatrixNnzEntry;
+    typedef map<int, DofContainer> RankToDofContainer;
+
+    // Stores to each rank a list of nnz entries (i.e. pairs of row and column
+    // index) that this rank will send to. These nnz entries will be assembled
+    // on this rank, but because the row DOFs are not DOFs of this rank they 
+    // will be send to the owner of the row DOFs.
+    map<int, MatrixNnzEntry> sendMatrixEntry;
+
+    // Maps to each DOF that must be send to another rank the rank number of the
+    // receiving rank.
+    map<pair<DegreeOfFreedom, int>, int> sendDofToRank;
+
+
+    // First, create for all ranks, to which we send data to, MatrixNnzEntry 
+    // object with 0 entries.
+    for (unsigned int i = 0; i < feSpaces.size(); i++) {
+      for (DofComm::Iterator it(meshDistributor->getDofComm().getRecvDofs(), feSpaces[i]);
+	   !it.end(); it.nextRank()) {
+	sendMatrixEntry[it.getRank()].resize(0);
+	
+	for (; !it.endDofIter(); it.nextDof())
+	  sendDofToRank[make_pair(it.getDofIndex(), i)] = it.getRank();
+      }
+    }
+
+    // Create list of ranks from which we receive data from.
+    std::set<int> recvFromRank;
+    for (unsigned int i = 0; i < feSpaces.size(); i++) 
+      for (DofComm::Iterator it(meshDistributor->getDofComm().getSendDofs(), feSpaces[i]);
+	   !it.end(); it.nextRank())
+	recvFromRank.insert(it.getRank());
+
+
+    // === Traverse matrices to create nnz data. ===
+
+    int nComponents = mat->getNumRows();
+    for (int i = 0; i < nComponents; i++) {
+      for (int j = 0; j < nComponents; j++) {
+ 	if (!(*mat)[i][j])
+	  continue;
+
+	TEST_EXIT_DBG((*mat)[i][j]->getRowFeSpace() == feSpaces[i])
+	  ("Should not happen!\n");
+	TEST_EXIT_DBG((*mat)[i][j]->getColFeSpace() == feSpaces[j])
+	  ("Should not happen!\n");
+
+	Matrix bmat = (*mat)[i][j]->getBaseMatrix();
+
+	traits::col<Matrix>::type col(bmat);
+	traits::const_value<Matrix>::type value(bmat);
+	  
+	typedef traits::range_generator<row, Matrix>::type cursor_type;
+	typedef traits::range_generator<nz, cursor_type>::type icursor_type;
+
+	MultiIndex rowDofIndex;
+	
+	for (cursor_type cursor = begin<row>(bmat), 
+	       cend = end<row>(bmat); cursor != cend; ++cursor) {
+
+	  if (dofMap[feSpaces[i]].find(*cursor, rowDofIndex) == false)
+	    continue;
+
+	  // The corresponding global matrix row index of the current row DOF.
+	  int petscRowIdx = 0;
+	  if (localMatrix) {
+	    petscRowIdx = dofMap.getLocalMatIndex(i, *cursor);
+	  } else {
+	    if (dofMap.isMatIndexFromGlobal())
+	      petscRowIdx = dofMap.getMatIndex(i, rowDofIndex.global);
+	    else
+	      petscRowIdx = dofMap.getMatIndex(i, *cursor);
+	  }
+
+	  if (localMatrix || dofMap[feSpaces[i]].isRankDof(*cursor)) {
+    	    
+	    // === The current row DOF is a rank DOF, so create the       ===
+	    // === corresponding nnz values directly on rank's nnz data.  ===
+	    
+	    // This is the local row index of the local PETSc matrix.
+	    int localPetscRowIdx = petscRowIdx;
+
+	    if (localMatrix == false) {
+	      localPetscRowIdx -= rankStartIndex;
+	    
+	      TEST_EXIT_DBG(localPetscRowIdx >= 0 && localPetscRowIdx < nRankRows)
+		("Should not happen! \n Debug info: DOF = %d   globalRowIndx = %d   petscRowIdx = %d   localPetscRowIdx = %d   rStart = %d   compontens = %d from %d   nRankRows = %d\n",
+		 *cursor,
+		 dofMap[feSpaces[i]][*cursor].global,
+		 petscRowIdx, 
+		 localPetscRowIdx, 
+		 rankStartIndex,
+		 i,
+		 nComponents, 
+		 nRankRows);
+	    }
+	    
+
+	    if (localMatrix) {
+	      for (icursor_type icursor = begin<nz>(cursor), 
+		     icend = end<nz>(cursor); icursor != icend; ++icursor)
+		if (dofMap[feSpaces[j]].isSet(col(*icursor)))
+		  diagNnz[localPetscRowIdx]++;	      
+	    } else {
+	      MultiIndex colDofIndex;
+
+	      // Traverse all non zero entries in this row.
+	      for (icursor_type icursor = begin<nz>(cursor), 
+		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
+		if (dofMap[feSpaces[j]].find(col(*icursor), colDofIndex) == false)
+		  continue;
+		
+		int petscColIdx = (dofMap.isMatIndexFromGlobal() ? 
+				   dofMap.getMatIndex(j, colDofIndex.global) :
+				   dofMap.getMatIndex(j, col(*icursor)));
+		
+		if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) {
+		  // The row DOF is a rank DOF, if also the column is a rank DOF, 
+		  // increment the diagNnz values for this row, 
+		  // otherwise the offdiagNnz value.
+		  if (petscColIdx >= rankStartIndex && 
+		      petscColIdx < rankStartIndex + nRankRows)
+		    diagNnz[localPetscRowIdx]++;
+		  else
+		    offdiagNnz[localPetscRowIdx]++;
+		}    
+	      }
+	    }
+	  } else {
+	    // === The current row DOF is not a rank DOF, i.e., its values   ===
+	    // === are also created on this rank, but afterthere they will   ===
+	    // === be send to another rank. So we need to send also the      ===
+	    // === corresponding nnz structure of this row to the corres-    ===
+	    // === ponding rank.                                             ===
+	    
+	    // Send all non zero entries to the member of the row DOF.
+	    int sendToRank = sendDofToRank[make_pair(*cursor, i)];
+	    MultiIndex colDofIndex;
+
+	    for (icursor_type icursor = begin<nz>(cursor), 
+		   icend = end<nz>(cursor); icursor != icend; ++icursor) {
+	      if (dofMap[feSpaces[j]].find(col(*icursor), colDofIndex) == false)
+		continue;
+
+	      if (value(*icursor) != 0.0) {
+		int petscColIdx = (dofMap.isMatIndexFromGlobal() ? 
+				   dofMap.getMatIndex(j, colDofIndex.global) :
+				   dofMap.getMatIndex(j, col(*icursor)));
+		
+		sendMatrixEntry[sendToRank].
+		  push_back(make_pair(petscRowIdx, petscColIdx));
+	      }
+	    }
+	    
+	  } // if (isRankDof[*cursor]) ... else ...
+	} // for each row in mat[i][j]
+      } 
+    }
+
+    if (localMatrix == false) {
+      // === Send and recv the nnz row structure to/from other ranks. ===
+      
+      StdMpi<MatrixNnzEntry> stdMpi(mpiCommGlobal, true);
+      stdMpi.send(sendMatrixEntry);
+      for (std::set<int>::iterator it = recvFromRank.begin(); 
+	   it != recvFromRank.end(); ++it)
+	stdMpi.recv(*it);
+      stdMpi.startCommunication();
+      
+      
+      // === Evaluate the nnz structure this rank got from other ranks and add ===
+      // === it to the PETSc nnz data structure.                               ===
+      
+      for (map<int, MatrixNnzEntry>::iterator it = stdMpi.getRecvData().begin();
+	   it != stdMpi.getRecvData().end(); ++it) {
+	if (it->second.size() > 0) {
+	  for (unsigned int i = 0; i < it->second.size(); i++) {
+	    int r = it->second[i].first;
+	    int c = it->second[i].second;
+	    
+	    int localRowIdx = r - rankStartIndex;
+	    
+	    TEST_EXIT_DBG(localRowIdx >= 0 && localRowIdx < nRankRows)
+	      ("Got row index %d/%d (nRankRows = %d) from rank %d. Should not happen!\n",
+	       r, localRowIdx, nRankRows, it->first);
+	    
+	    if (c < rankStartIndex || c >= rankStartIndex + nRankRows)
+	      offdiagNnz[localRowIdx]++;
+	    else
+	      diagNnz[localRowIdx]++;
+	  }
+	}
+      }
+
+      // The above algorithm for calculating the number of nnz per row over-
+      // approximates the value, i.e., the number is always equal or larger to 
+      // the real number of nnz values in the global parallel matrix. For small
+      // matrices, the problem may arise, that the result is larger than the
+      // number of elements in a row. This is fixed in the following.
+      
+      if (nRankRows < 100) 
+	for (int i = 0; i < nRankRows; i++)
+	  diagNnz[i] = std::min(diagNnz[i], nRankRows);
+    }
+
+
+#if (DEBUG != 0)    
+    int nMax = 0; 
+    int nSum = 0;
+    for (int i = 0; i < nRankRows; i++) {
+      nMax = std::max(nMax, diagNnz[i]);
+      nSum += diagNnz[i];
+    }
+
+    MSG("NNZ in matrix: max = %d, avrg = %.0f\n",
+	nMax, static_cast<double>(nSum) / nRankRows);
+#endif
+
+  }
 }
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
index e3efdf02..63b2fc47 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
@@ -37,8 +37,10 @@ namespace AMDiS {
     PetscSolverGlobalMatrix()
       : PetscSolver(),
 	petscSolVec(PETSC_NULL),
-	d_nnz(NULL),
-	o_nnz(NULL),
+	matInteriorDiagNnz(NULL),
+	matInteriorOffdiagNnz(NULL),
+	matCoarseDiagNnz(NULL),
+	matCoarseOffdiagNnz(NULL),
 	lastMeshNnz(0),
 	zeroStartVector(false),
 	alwaysCreateNnzStructure(false)
@@ -67,6 +69,8 @@ namespace AMDiS {
     void destroyVectorData();
 
   protected:
+    bool checkMeshChange(Matrix<DOFMatrix*> *mat, bool localMatrix = false);
+
     void createFieldSplit(PC pc);
 
     virtual void initPreconditioner(PC pc);
@@ -75,6 +79,12 @@ namespace AMDiS {
 
     /// Creates a new non zero pattern structure for the PETSc matrix. 
     void createPetscNnzStructure(Matrix<DOFMatrix*> *mat);
+
+    void createPetscNnzStructureWithCoarseSpace(Matrix<DOFMatrix*> *mat,
+						ParallelDofMapping &dofMap,
+						int*& diagNnz, 
+						int*& offdiagNnz,
+						bool localMatrix = false);
     
     /// Takes a DOF matrix and sends the values to the global PETSc matrix.
     void setDofMatrix(DOFMatrix* mat, int nRowMat = 0, int nColMat = 0);
@@ -98,8 +108,11 @@ namespace AMDiS {
 
 
   protected:
-    /// Arrays definig the non zero pattern of Petsc's matrix.
-    int *d_nnz, *o_nnz;
+    /// Arrays definig the non zero pattern of Petsc's interior matrix.
+    int *matInteriorDiagNnz, *matInteriorOffdiagNnz;
+
+    /// Arrays definig the non zero pattern of Petsc's coarse space matrix.
+    int *matCoarseDiagNnz, *matCoarseOffdiagNnz;
 
     Vec petscSolVec;
 
-- 
GitLab