diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 108be0e9bad2daa2b408995baf9ec623872be53b..08150a64063f2f2e68b47c5814f0e4760208cea2 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -1022,34 +1022,61 @@ namespace AMDiS {
     printImbalanceFactor();
   }
 
-  
-  void MeshDistributor::printImbalanceFactor()
+
+  void MeshDistributor::getImbalanceFactor(double &imbalance, 
+					   int &minDofs, 
+					   int &maxDofs,
+					   int &sumDofs)
   {
-    FUNCNAME("MeshDistributor::printImbalanceFactor()");
+    FUNCNAME("MeshDistributor::getImbalanceFactor()");
 
     vector<int> nDofsInRank(mpiSize);
     int nDofs = mesh->getDofAdmin(0).getUsedDofs();
     mpiComm.Gather(&nDofs, 1, MPI_INT, &(nDofsInRank[0]), 1, MPI_INT, 0);
 
     if (mpiRank == 0) {
-      int nOverallDofs = 0;
-      int maxDofs = numeric_limits<int>::min();
-      int minDofs = numeric_limits<int>::max();
+      sumDofs = 0;
+      minDofs = numeric_limits<int>::max();
+      maxDofs = numeric_limits<int>::min();
+
       for (int i = 0; i < mpiSize; i++) {
-	nOverallDofs += nDofsInRank[i];
-	maxDofs = std::max(maxDofs, nDofsInRank[i]);
+	sumDofs += nDofsInRank[i];	
 	minDofs = std::min(minDofs, nDofsInRank[i]);
+	maxDofs = std::max(maxDofs, nDofsInRank[i]);
       }      
-//       int avrgDofs = nOverallDofs / mpiSize;
-//       double imbalance0 = 
-// 	(static_cast<double>(maxDofs - avrgDofs) /  avrgDofs) * 100.0;
-      double imbalance1 = (static_cast<double>(maxDofs) / minDofs - 1.0) * 100.0;
-
-      MSG("Imbalancing factor: %.1f\n", imbalance1);
+      int avrgDofs = sumDofs / mpiSize;
+      imbalance = ((static_cast<double>(maxDofs) / avrgDofs) - 1.0);
     }
   }
 
   
+  double MeshDistributor::getImbalanceFactor()
+  {
+    double factor;
+    int a = 0;
+    int b = 0;
+    int c = 0;
+    getImbalanceFactor(factor, a, b, c);
+    return factor;
+  }
+  
+
+  void MeshDistributor::printImbalanceFactor()
+  {
+    FUNCNAME("MeshDistributor::printImbalanceFactor()");
+
+    double imbalanceFactor = 0.0;
+    int minDofs = 0;
+    int maxDofs = 0;
+    int sumDofs = 0;
+
+    getImbalanceFactor(imbalanceFactor, minDofs, maxDofs, sumDofs);
+    if (mpiRank == 0)
+      MSG("Imbalancing factor: %.2f  [ minDofs = %d, maxDofs = %d, sumDofs = %d ]\n", 
+	  imbalanceFactor * 100.0, minDofs, maxDofs, sumDofs);
+  }
+
+  
   bool MeshDistributor::checkAndAdaptBoundary(RankToBoundMap &allBound)
   {
     FUNCNAME("MeshDistributor::checkAndAdaptBoundary()");
@@ -1217,34 +1244,18 @@ namespace AMDiS {
   void MeshDistributor::repartitionMesh()
   {
     FUNCNAME("MeshDistributor::repartitionMesh()");
-    
-    // === First we check if the rank with the maximum number of DOFs has at  ===
-    // === least 20% more DOFs than the rank with the minimum number of DOFs. ===
-    // === In this case, the mesh will be repartition.                        ===
 
-    double inbalanceFactor = 1.2;
-    Parameters::get("parallel->repartitioning->inbalance", inbalanceFactor);
+    // === First, check if the load is unbalanced on the ranks. ===
 
     int repartitioning = 0;
-    vector<int> nDofsInRank(mpiSize);
-    int nDofs = mesh->getDofAdmin(0).getUsedDofs();
-    mpiComm.Gather(&nDofs, 1, MPI_INT, &(nDofsInRank[0]), 1, MPI_INT, 0);
+    double imbalanceFactor = getImbalanceFactor();
 
     if (mpiRank == 0) {
-      int nOverallDofs = 0;
-      int minDofs = numeric_limits<int>::max();
-      int maxDofs = numeric_limits<int>::min();
-      for (int i = 0; i < mpiSize; i++) {
-	nOverallDofs += nDofsInRank[i];
-	minDofs = std::min(minDofs, nDofsInRank[i]);
-	maxDofs = std::max(maxDofs, nDofsInRank[i]);
-      }      
-     
-      MSG("Overall DOFs: %d    Min DOFs: %d    Max DOFs: %d\n", 
-	  nOverallDofs, minDofs, maxDofs);
+      double imbalanceRepartitionBound = 0.2;
+      Parameters::get("parallel->repartitioning->imbalance", 
+		      imbalanceRepartitionBound);
 
-      if (static_cast<double>(maxDofs) / static_cast<double>(minDofs) > 
-	 inbalanceFactor) 
+      if (imbalanceFactor > imbalanceRepartitionBound)
 	repartitioning = 1;
 
       mpiComm.Bcast(&repartitioning, 1, MPI_INT, 0);
@@ -1283,6 +1294,19 @@ namespace AMDiS {
       }
     }
 
+    double maxWeight = -1.0;
+    double sumWeight = 0.0;
+    for (map<int, double>::iterator it = elemWeights.begin();
+	 it != elemWeights.end(); ++it) {
+      maxWeight = std::max(maxWeight, it->second);
+      sumWeight += it->second;
+    }
+
+    mpi::globalMax(maxWeight);
+    mpi::globalAdd(sumWeight);
+
+    MSG("Partition weight: sum = %e max = %e\n", sumWeight, maxWeight);
+
     // === Run mesh partitioner to calculate a new mesh partitioning.  ===
 
     partitioner->setLocalGlobalDofMap(&(dofMap[feSpaces[0]].getMap()));
@@ -1298,7 +1322,7 @@ namespace AMDiS {
     // without and changes.
     if (!partitioner->meshChanged()) {
       MSG("Mesh partition does not create a new partition!\n");
-      return;
+	return;
     }
 
     TEST_EXIT_DBG(!(partitioner->getSendElements().size() == mesh->getMacroElements().size() && 
@@ -1514,27 +1538,7 @@ namespace AMDiS {
     
     check3dValidMesh();
 
-    MSG("Mesh repartitioning needed %.5f seconds\n", MPI::Wtime() - timePoint);
-
-    
-    // === Print DOF information to screen. ===
-
-    nDofs = mesh->getDofAdmin(0).getUsedDofs();
-    mpiComm.Gather(&nDofs, 1, MPI_INT, &(nDofsInRank[0]), 1, MPI_INT, 0);
-
-    if (mpiRank == 0) {
-      int nOverallDofs = 0;
-      int minDofs = numeric_limits<int>::max();
-      int maxDofs = numeric_limits<int>::min();
-      for (int i = 0; i < mpiSize; i++) {
-	nOverallDofs += nDofsInRank[i];
-	minDofs = std::min(minDofs, nDofsInRank[i]);
-	maxDofs = std::max(maxDofs, nDofsInRank[i]);
-      }      
-     
-      MSG("Overall DOFs: %d    Min DOFs: %d    Max DOFs: %d\n", 
-	  nOverallDofs, minDofs, maxDofs);
-    }
+    MSG("Mesh repartitioning needed %.5f seconds\n", MPI::Wtime() - timePoint);   
   }
 
 
diff --git a/AMDiS/src/parallel/MeshDistributor.h b/AMDiS/src/parallel/MeshDistributor.h
index 953cbec6d5724bb9f748112fff89f3096fb44c24..4878f469f61db4153c0bd33ad687b5a0c806984b 100644
--- a/AMDiS/src/parallel/MeshDistributor.h
+++ b/AMDiS/src/parallel/MeshDistributor.h
@@ -97,22 +97,25 @@ namespace AMDiS {
      */
     void checkMeshChange(bool tryRepartition = true);
 
-    /** \brief
-     * Checks if is required to repartition the mesh. If this is the case, a new
-     * partition will be created and the mesh will be redistributed between the
-     * ranks.
-     */
+    /// Checks if is required to repartition the mesh. If this is the case, a new
+    /// partition will be created and the mesh will be redistributed between the
+    /// ranks.
     void repartitionMesh();
     
+    void getImbalanceFactor(double &imbalance, 
+			    int &minDofs, 
+			    int &maxDofs,
+			    int &sumDofs);
+
+    double getImbalanceFactor();
+
     /// Calculates the imbalancing factor and prints it to screen.
     void printImbalanceFactor();
 
-    /** \brief
-     * Test, if the mesh consists of macro elements only. The mesh partitioning 
-     * of the parallelization works for macro meshes only and would fail, if the 
-     * mesh is already refined in some way. Therefore, this function will exit
-     * the program if it finds a non macro element in the mesh.
-     */
+    /// Test, if the mesh consists of macro elements only. The mesh partitioning 
+    /// of the parallelization works for macro meshes only and would fail, if the 
+    /// mesh is already refined in some way. Therefore, this function will exit
+    /// the program if it finds a non macro element in the mesh.
     void testForMacroMesh();
 
     /// Set for each element on the partitioning level the number of 
diff --git a/AMDiS/src/parallel/ParMetisPartitioner.cc b/AMDiS/src/parallel/ParMetisPartitioner.cc
index 083d7937083f478a94c77d618876560a056e9f30..ce0967ff270c16df79e7803b0c86ffda0f14229b 100644
--- a/AMDiS/src/parallel/ParMetisPartitioner.cc
+++ b/AMDiS/src/parallel/ParMetisPartitioner.cc
@@ -290,17 +290,18 @@ namespace AMDiS {
 
     vector<double> tpwgts(mpiSize);
     double ubvec = 1.05;
-    int options[4] = {0, 0, 15, 1}; // default options
+    int options[4] = {0, 0, 15, PARMETIS_PSR_COUPLED}; // default options
     int edgecut = -1;
     vector<int> part(nElements);
 
     // set tpwgts
     for (int i = 0; i < mpiSize; i++)
-      tpwgts[i] = 1.0 / nparts;
+      tpwgts[i] = 1.0 / static_cast<double>(nparts);
    
     float scale = 10000.0 / maxWgt;
     for (int i = 0; i < nElements; i++)
-      wgts[i] = static_cast<int>(floatWgts[i] * scale);
+      wgts[i] = floatWgts[i];
+      //      wgts[i] = static_cast<int>(floatWgts[i] * scale);
 
 
     // === Start ParMETIS. ===
diff --git a/AMDiS/src/parallel/PetscProblemStat.cc b/AMDiS/src/parallel/PetscProblemStat.cc
index 251bd0f73b4c3164f9f4a635db05d669ddec9e4e..73dd638f69ba9203784f5dc4bc17377d80c5a5e8 100644
--- a/AMDiS/src/parallel/PetscProblemStat.cc
+++ b/AMDiS/src/parallel/PetscProblemStat.cc
@@ -86,16 +86,6 @@ namespace AMDiS {
 
     double wtime = MPI::Wtime();
 
-#if 0
-    double vm, rss;
-    processMemUsage(vm, rss);       
-    MSG("STAGE 1\n");
-    MSG("My memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);    
-    mpi::globalAdd(vm);
-    mpi::globalAdd(rss);
-    MSG("Overall memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);
-#endif
-
     if (createMatrixData) {
       petscSolver->setMeshDistributor(meshDistributor, 
 				      meshDistributor->getMpiComm(),
@@ -106,33 +96,19 @@ namespace AMDiS {
 
     petscSolver->fillPetscRhs(rhs);
 
-#if 0
-    processMemUsage(vm, rss);   
-    MSG("STAGE 2\n");
-    MSG("My memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);    
-    mpi::globalAdd(vm);
-    mpi::globalAdd(rss);
-    MSG("Overall memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);
-#endif
+    INFO(info, 8)("creation of parallel data structures needed %.5f seconds\n", 
+		  MPI::Wtime() - wtime);
+    wtime = MPI::Wtime();
 
     petscSolver->solvePetscMatrix(*solution, adaptInfo);   
 
+    INFO(info, 8)("solution of discrete system needed %.5f seconds\n", 
+		  MPI::Wtime() - wtime);
+
     petscSolver->destroyVectorData();
 
     if (!storeMatrixData)
       petscSolver->destroyMatrixData();
-
-#if 0
-    processMemUsage(vm, rss);   
-    MSG("STAGE 3\n");
-    MSG("My memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);    
-    mpi::globalAdd(vm);
-    mpi::globalAdd(rss);
-    MSG("Overall memory usage is VM = %.1f MB    RSS = %.1f MB\n", vm, rss);
-#endif
-
-    INFO(info, 8)("solution of discrete system needed %.5f seconds\n", 
-		  MPI::Wtime() - wtime);
   }
 
 }
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
index 578fd3b21e08dbc58d92f905ff2d51b0a3866244..48d55f1ccb89cf236d18c0aff2e1a67fa9a92d46 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
@@ -126,7 +126,9 @@ namespace AMDiS {
     if (!zeroStartVector)
       KSPSetInitialGuessNonzero(kspInterior, PETSC_TRUE);
 
-    MSG("Fill petsc matrix needed %.5f seconds\n", MPI::Wtime() - wtime);
+#if (DEBUG != 0)
+    MSG("Fill petsc matrix 3 needed %.5f seconds\n", MPI::Wtime() - wtime);
+#endif
   }
 
 
@@ -640,17 +642,18 @@ namespace AMDiS {
     // Get periodic mapping object
     PeriodicMap &perMap = meshDistributor->getPeriodicMap();
 
-    // === Traverse all rows of the dof matrix and insert row wise the values ===
+    const FiniteElemSpace *rowFe = mat->getRowFeSpace();
+    const FiniteElemSpace *colFe = mat->getColFeSpace();
+    DofMap& rowMap = (*interiorMap)[rowFe].getMap();
+    DofMap& colMap = (*interiorMap)[colFe].getMap();
+
+    // === Traverse all rows of the DOF matrix and insert row wise the values ===
     // === to the PETSc matrix.                                               ===
 
     for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
 	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {
-
-      const FiniteElemSpace *rowFe = mat->getRowFeSpace();
-      const FiniteElemSpace *colFe = mat->getColFeSpace();
-
       // Global index of the current row DOF.
-      int globalRowDof = (*interiorMap)[rowFe][*cursor].global;
+      int globalRowDof = rowMap[*cursor].global;
 
       // Test if the current row DOF is a periodic DOF.
       bool periodicRow = perMap.isPeriodic(rowFe, globalRowDof);
@@ -668,7 +671,7 @@ namespace AMDiS {
 	     icursor != icend; ++icursor) {
 
 	  // Global index of the current column index.
-	  int globalColDof = (*interiorMap)[colFe][col(*icursor)].global;
+	  int globalColDof = colMap[col(*icursor)].global;
 	  // Test if the current col dof is a periodic dof.
 	  bool periodicCol = perMap.isPeriodic(colFe, globalColDof);
 	  // Get PETSc's mat col index.
@@ -680,8 +683,8 @@ namespace AMDiS {
 
 	  if (!periodicCol) {
 	    // Calculate the exact position of the column index in the PETSc matrix.
-	    cols.push_back(colIndex);
-	    values.push_back(value(*icursor));
+ 	    cols.push_back(colIndex);
+ 	    values.push_back(value(*icursor));
 	  } else {
 	    // === Row index is not periodic, but column index is. ===
 
@@ -727,8 +730,8 @@ namespace AMDiS {
 	  }
 	}
 
-	MatSetValues(matIntInt, 1, &rowIndex, cols.size(), 
-		     &(cols[0]), &(values[0]), ADD_VALUES);	
+  	MatSetValues(matIntInt, 1, &rowIndex, cols.size(), 
+  		     &(cols[0]), &(values[0]), ADD_VALUES);	
       } else {
 	// === Row DOF index is periodic. ===