diff --git a/AMDiS/src/io/ElementFileWriter.cc b/AMDiS/src/io/ElementFileWriter.cc
index 896fe9ce691cd9719e09110fa73416a58648e45d..2806f919669c1fde21ae6c96be3cf698315e8232 100644
--- a/AMDiS/src/io/ElementFileWriter.cc
+++ b/AMDiS/src/io/ElementFileWriter.cc
@@ -285,7 +285,8 @@ namespace AMDiS {
   {
     FUNCNAME("ElementFileWriter::writeVtkValues()");
 
-    TEST_EXIT((vec!=NULL || vecs!=NULL) && (vec==NULL || vecs==NULL))("Ether vec or vecs must be given, not both and not nothing!");
+    TEST_EXIT((vec!=NULL || vecs!=NULL) && (vec==NULL || vecs==NULL))
+      ("Ether vec or vecs must be given, not both and not nothing!");
 
 #if HAVE_PARALLEL_DOMAIN_AMDIS
     string filename = 
diff --git a/AMDiS/src/parallel/DofComm.cc b/AMDiS/src/parallel/DofComm.cc
index ba0207911b09701aca08c837eb996816a71bf1f2..4ed21232f3f4d1792297aa308edef369f18f9135 100644
--- a/AMDiS/src/parallel/DofComm.cc
+++ b/AMDiS/src/parallel/DofComm.cc
@@ -111,6 +111,33 @@ namespace AMDiS {
   }
   
 
+  int DofComm::getDegree(const FiniteElemSpace *feSpace, 
+			 const DegreeOfFreedom *dof)
+  {
+    FUNCNAME("DofComm::getDegree()");
+
+    TEST_EXIT_DBG(meshLevel == 0)("Not yet implemented!\n");
+
+    int degree = 0;
+
+    for (map<int, FeMapType>::iterator it = sendDofs[0].begin();
+	 it != sendDofs[0].end(); ++it) {
+      DofContainer &dc = it->second[feSpace];
+      if (find(dc.begin(), dc.end(), dof) != dc.end())
+	degree++;
+    }
+
+    for (map<int, FeMapType>::iterator it = recvDofs[0].begin();
+	 it != recvDofs[0].end(); ++it) {
+      DofContainer &dc = it->second[feSpace];
+      if (find(dc.begin(), dc.end(), dof) != dc.end())
+	degree++;
+    }
+
+    return degree;
+  }
+
+
   bool DofComm::Iterator::setNextFeMap()
   {
     FUNCNAME("DofComm::Iterator::setNextFeMap()");
diff --git a/AMDiS/src/parallel/DofComm.h b/AMDiS/src/parallel/DofComm.h
index 42c98e48a08895e41cff32732b56e2b6703b593f..28e97af4b639b26be17ada98df1bdb6ec6fe939f 100644
--- a/AMDiS/src/parallel/DofComm.h
+++ b/AMDiS/src/parallel/DofComm.h
@@ -70,6 +70,9 @@ namespace AMDiS {
 		      const FiniteElemSpace *feSpace,
 		      bool countDouble = false);
 
+    int getDegree(const FiniteElemSpace *feSpace, 
+		  const DegreeOfFreedom *dof);
+
   protected:
     void createContainer(RankToBoundMap &boundary, 
 			 DataType &data);
diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 0c65e0ac3bdee220bda8c394473e648c895de294..fbdd1794b5f28be5cf86ac37a055775dfa34e61c 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -85,14 +85,14 @@ namespace AMDiS {
       nMeshChangesAfterLastRepartitioning(0),
       repartitioningCounter(0),
       repartitioningFailed(0),
-
       debugOutputDir(""),
       lastMeshChangeIndex(0),
       createBoundaryDofFlag(0),
       boundaryDofInfo(1),
       meshAdaptivity(true),
       hasPeriodicBoundary(false),
-      printTimings(false)
+      printTimings(false),
+      printMemoryUsage(false)
   {
     FUNCNAME("MeshDistributor::MeshDistributor()");
 
@@ -130,6 +130,7 @@ namespace AMDiS {
     partitioner->setBoxPartitioning(static_cast<bool>(tmp));
 
     Parameters::get(name + "->print timings", printTimings);
+    Parameters::get(name + "->print memory usage", printMemoryUsage);
 
     TEST_EXIT(partitioner)("Could not create partitioner \"%s\"!\n", partStr.c_str());
 
@@ -1224,8 +1225,6 @@ namespace AMDiS {
       
     map<int, MeshCodeVec> sendCodes;
    
-    double first = MPI::Wtime();
-
     for (RankToBoundMap::iterator it = allBound.begin(); 
 	 it != allBound.end(); ++it) {
       for (vector<AtomicBoundary>::iterator boundIt = it->second.begin();
@@ -1244,8 +1243,6 @@ namespace AMDiS {
 
     stdMpi.startCommunication();
 
-    MSG("        -> communicate codes needed %.5f seconds\n", MPI::Wtime() - first);
-    first = MPI::Wtime();
 
     // === Compare received mesh structure codes. ===
     
@@ -1281,8 +1278,6 @@ namespace AMDiS {
       }
     }
 
-    MSG("        -> fitElementToMeshCode needed %.5f seconds\n", MPI::Wtime() - first);
-
     return meshChanged;
   }
 
@@ -1693,6 +1688,15 @@ namespace AMDiS {
       elObjDb.create(partitionMap, levelData);
     elObjDb.updateRankData();
 
+
+    // === If requested, calculate and print memory usage of element  ===
+    // === object database.                                           ===
+    if (printMemoryUsage && firstCall) {
+      unsigned long memsize = elObjDb.calculateMemoryUsage();
+      MSG("Memory usage of element object database = %5.f KByte\n", 
+	  static_cast<double>(memsize / 1024.0));
+    }
+
     
     intBoundary.create(levelData, elObjDb);
 
@@ -1876,7 +1880,6 @@ namespace AMDiS {
       ParallelDebug::testCommonDofs(*this, true);
       ParallelDebug::testGlobalIndexByCoords(*this);   
     }
-
 #else
 
     for (int i = 0; i < static_cast<int>(dofMaps.size()); i++) {
diff --git a/AMDiS/src/parallel/MeshDistributor.h b/AMDiS/src/parallel/MeshDistributor.h
index 4d36f6a43a5351fdae71c1fb62a389dfcd1c4e3d..e0884ef78ba1e6e77a7379f0307a6eeb5a656932 100644
--- a/AMDiS/src/parallel/MeshDistributor.h
+++ b/AMDiS/src/parallel/MeshDistributor.h
@@ -606,6 +606,9 @@ namespace AMDiS {
     /// If true, detailed timings for benchmarking are printed.
     bool printTimings;
 
+    /// If true, detailed information about memory usage are printed.
+    bool printMemoryUsage;
+
   public:
     /// The boundary DOFs are sorted by subobject entities, i.e., first all
     /// face DOFs, edge DOFs and to the last vertex DOFs will be set to
diff --git a/AMDiS/src/parallel/ParallelDebug.cc b/AMDiS/src/parallel/ParallelDebug.cc
index a0440abbad7ac3fefcc59d60d103f0e20f73af12..1aaf5022f71bd0f1b1016ef824f9f3963b2506cd 100644
--- a/AMDiS/src/parallel/ParallelDebug.cc
+++ b/AMDiS/src/parallel/ParallelDebug.cc
@@ -875,7 +875,7 @@ namespace AMDiS {
     ElInfo *elInfo = stack.traverseFirst(pdb.mesh, -1, Mesh::CALL_LEAF_EL);
     
     while (elInfo) {		  
-      int index = elInfo->getElement()->getIndex();
+      int index = elInfo->getMacroElement()->getIndex();
       vec[index] = pdb.partitionMap[index];
       elInfo = stack.traverseNext(elInfo);
     }
diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc
index 8970f73f3d823925bbb8ebde94a0f6de68b1eb4a..b8195215dd788f5299ee675d6c63a19f74c2851f 100644
--- a/AMDiS/src/parallel/PetscHelper.cc
+++ b/AMDiS/src/parallel/PetscHelper.cc
@@ -76,28 +76,45 @@ namespace AMDiS {
 
     void blockMatMatSolve(MPI::Intracomm mpiComm, KSP ksp, Mat mat0, Mat &mat1)
     {
+      FUNCNAME("blockMatMatSolve()");
+
       // === We have to  calculate mat1 = ksp mat0:                       ===
       // ===    - get all local column vectors from mat0                  ===
       // ===    - solve with ksp for this column vector as the rhs vector ===
       // ===    - set the result to the corresponding column vector of    ===
       // ===      matrix mat1                                             ===
       
+      // Transform matrix mat0 into a sparse column format.
+      PetscMatCol mat0_cols;
+      getMatLocalColumn(mat0, mat0_cols);
+
+      int nFirstCol, nLastCol;
+      MatGetOwnershipRangeColumn(mat0, &nFirstCol, &nLastCol);
+
+      int dnnz = 0;
+      int onnz = 0;
+      for (PetscMatCol::iterator it = mat0_cols.begin(); 
+	   it != mat0_cols.end(); ++it) {
+	if (it->first >= nFirstCol && it->first < nLastCol)
+	  dnnz++;
+	else
+	  onnz++;
+      }
+
+
       PetscInt localRows, localCols, globalRows, globalCols;
       MatGetLocalSize(mat0, &localRows, &localCols);
       MatGetSize(mat0, &globalRows, &globalCols);
 
       MatCreateAIJ(mpiComm,
 		   localRows, localCols, globalRows, globalCols,
-		   150, PETSC_NULL, 150, PETSC_NULL, &mat1);
+		   dnnz, PETSC_NULL, onnz, PETSC_NULL, &mat1);
       MatSetOption(mat1, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
       
-      // Transform matrix mat0 into a sparse column format.
-      PetscMatCol mat0_cols;
-      getMatLocalColumn(mat0, mat0_cols);
       
       Vec tmpVec;
       VecCreateSeq(PETSC_COMM_SELF, localRows, &tmpVec);
-      
+
       // Solve for all column vectors of mat A_BPi and create matrix matK
       for (PetscMatCol::iterator it = mat0_cols.begin(); 
 	   it != mat0_cols.end(); ++it) {
@@ -107,7 +124,7 @@ namespace AMDiS {
       }
       
       VecDestroy(&tmpVec);
-      
+
       MatAssemblyBegin(mat1, MAT_FINAL_ASSEMBLY);
       MatAssemblyEnd(mat1, MAT_FINAL_ASSEMBLY);
     }
@@ -282,6 +299,7 @@ namespace AMDiS {
 		   PetscReal atol,
 		   PetscInt maxIt)
     {
+
       setSolverWithLu(ksp, kspPrefix, kspType, pcType, PETSC_NULL, 
 		      rtol, atol, maxIt);
     }
diff --git a/AMDiS/src/parallel/PetscProblemStat.cc b/AMDiS/src/parallel/PetscProblemStat.cc
index 0a21e9b290b920cd972e866476a12f03c57863f0..14fad3cc54d223e4138530737adac479ddd8ca0f 100644
--- a/AMDiS/src/parallel/PetscProblemStat.cc
+++ b/AMDiS/src/parallel/PetscProblemStat.cc
@@ -78,6 +78,7 @@ namespace AMDiS {
 
     TEST_EXIT(meshDistributor)("Should not happen!\n");
 
+    MPI::COMM_WORLD.Barrier();
     double wtime = MPI::Wtime();
 
     if (createMatrixData)
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 67cc8cda23da6df1ccaa4317073b520131541c72..3941b7a6e9cc6daa5e6a0c66708328fe6ebb183d 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -356,7 +356,6 @@ namespace AMDiS {
       }
     }
 
-
     // === Calculate the number of primals that are owned by the rank and ===
     // === create local indices of the primals starting at zero.          ===
 
diff --git a/AMDiS/src/parallel/PetscSolverFetiDebug.cc b/AMDiS/src/parallel/PetscSolverFetiDebug.cc
index ca9aebbe17d365226e51694cb561440711f9835f..24b427da61ebd8066a56730d3f5061c3e5dd95fc 100644
--- a/AMDiS/src/parallel/PetscSolverFetiDebug.cc
+++ b/AMDiS/src/parallel/PetscSolverFetiDebug.cc
@@ -601,9 +601,10 @@ namespace AMDiS {
       
       if (MPI::COMM_WORLD.Get_rank() == 0) {
 	vector<WorldVector<double> > allPrimals;
-	
-	for (int i = 1; i < MPI::COMM_WORLD.Get_size(); i++) {
-	  vector<double> &recvData = stdMpi.getRecvData(i);
+
+	for (map<int, vector<double> >::iterator it = stdMpi.getRecvData().begin();
+	     it != stdMpi.getRecvData().end(); ++it) {
+	  vector<double> &recvData = it->second;
 	  
 	  TEST_EXIT(recvData.size() % mesh->getDim() == 0)
 	    ("Wrong number of coordinates received!\n");
@@ -679,9 +680,10 @@ namespace AMDiS {
 
       if (MPI::COMM_WORLD.Get_rank() == 0) {
 	vector<WorldVector<double> > faceNodes;
-	
-	for (int i = 1; i < MPI::COMM_WORLD.Get_size(); i++) {
-	  vector<double> &recvData = stdMpi.getRecvData(i);
+
+	for (map<int, vector<double> >::iterator it = stdMpi.getRecvData().begin();
+	     it != stdMpi.getRecvData().end(); ++it) {
+	  vector<double> &recvData = it->second;
 	  
 	  TEST_EXIT(recvData.size() % 9 == 0)
 	    ("Wrong number of coordinates received!\n");
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
index 90a36b22d4b04da23e2b663b0cbeeb38ffab3804..4fd425e339196a828dce4db6afbe06e0db101a47 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
@@ -25,7 +25,6 @@
 
 #include <boost/tuple/tuple.hpp>
 #include "AMDiS_fwd.h"
-#include "parallel/MatrixNnzStructure.h"
 #include "parallel/PetscSolver.h"
 
 namespace AMDiS {
diff --git a/AMDiS/src/parallel/PetscSolverNavierStokes.cc b/AMDiS/src/parallel/PetscSolverNavierStokes.cc
index 428dad486b81d5659ee135ecd273f1494906d362..b55228134ec86e0360bdd5b9ccd06f0995536220 100644
--- a/AMDiS/src/parallel/PetscSolverNavierStokes.cc
+++ b/AMDiS/src/parallel/PetscSolverNavierStokes.cc
@@ -121,6 +121,9 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverNavierStokes::initPreconditioner()");
 
+    MPI::COMM_WORLD.Barrier();
+    double wtime = MPI::Wtime();
+
     TEST_EXIT(nu)("nu pointer not set!\n");
     TEST_EXIT(invTau)("invtau pointer not set!\n");
     TEST_EXIT(solution)("solution pointer not set!\n");
@@ -136,16 +139,17 @@ namespace AMDiS {
     PCSetType(pc, PCFIELDSPLIT);
     PCFieldSplitSetType(pc, PC_COMPOSITE_SCHUR);
     PCFieldSplitSetSchurFactType(pc, PC_FIELDSPLIT_SCHUR_FACT_FULL);
+
     createFieldSplit(pc, "velocity", velocityComponents);
     createFieldSplit(pc, "pressure", pressureComponent);
     PCSetFromOptions(pc);
 
-    KSPSetUp(kspInterior);
 
     KSP *subKsp;
     int nSubKsp;
     PCFieldSplitGetSubKSP(pc, &nSubKsp, &subKsp);
 
+
     TEST_EXIT(nSubKsp == 2)
       ("Wrong numer of KSPs inside of the fieldsplit preconditioner!\n");
 
@@ -156,7 +160,7 @@ namespace AMDiS {
     switch (velocitySolutionMode) {
     case 0:      
       petsc_helper::setSolver(kspVelocity, "", 
-			      KSPRICHARDSON, PCHYPRE, 0.0, 1e-14, 1);
+ 			      KSPRICHARDSON, PCHYPRE, 0.0, 1e-14, 1);
       break;
     case 1:
       petsc_helper::setSolverWithLu(kspVelocity, "", KSPPREONLY, 
@@ -306,7 +310,7 @@ namespace AMDiS {
     switch (laplaceSolutionMode) {
     case 0:
       petsc_helper::setSolver(matShellContext.kspLaplace, "laplace_", 
-			      KSPRICHARDSON, PCHYPRE, 0.0, 1e-14, 1);
+ 			      KSPRICHARDSON, PCHYPRE, 0.0, 1e-14, 1);
       break;
     case 1:
       petsc_helper::setSolverWithLu(matShellContext.kspLaplace, "mass_", 
@@ -318,6 +322,9 @@ namespace AMDiS {
     }
 
     setConstantNullSpace(matShellContext.kspLaplace);
+
+    MSG("Setup of Navier-Stokes preconditioner needed %.5f seconds\n", 
+	MPI::Wtime() - wtime);
   }