diff --git a/AMDiS/src/parallel/InteriorBoundary.cc b/AMDiS/src/parallel/InteriorBoundary.cc
index 539a15195de08d286549945400aad9d5cb7ec142..4c8e92469f7f88d62f5e3e3ffba86642d6f47ef2 100644
--- a/AMDiS/src/parallel/InteriorBoundary.cc
+++ b/AMDiS/src/parallel/InteriorBoundary.cc
@@ -228,7 +228,14 @@ namespace AMDiS {
 		      bound.rankObj.elIndex)("Should not happen!\n");
 
 	bound.rankObj.el = elIndexMap[bound.rankObj.elIndex];
-	bound.neighObj.el = NULL;
+
+	// For the case of periodic interior boundaries, a rank may have an boundary
+	// with itself. In this case, also the pointer to the neighbour object must
+	// be set correctly.
+	if (elIndexMap.count(bound.neighObj.elIndex))
+	  bound.neighObj.el = elIndexMap[bound.neighObj.elIndex];
+	else
+	  bound.neighObj.el = NULL;
       }
     }
   }
diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 425d54d4d5ef43dae11fd426d4f1fe4ef0758667..5a477d3d1e4344f7e3fcc86f947f9581939340a8 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -1644,6 +1644,10 @@ namespace AMDiS {
 
 	for (unsigned int i = 0; i < it->second.size(); i++) {
 	  AtomicBoundary &bound = it->second[i];
+
+	  TEST_EXIT_DBG(bound.rankObj.el)("No rank object!\n");
+	  TEST_EXIT_DBG(bound.neighObj.el)("No neigh object!\n");
+
 	  DofContainer dofs0, dofs1;
 	  bound.rankObj.el->getVertexDofs(feSpace, bound.rankObj, dofs0);
 	  bound.rankObj.el->getNonVertexDofs(feSpace, bound.rankObj, dofs0); 
diff --git a/AMDiS/src/parallel/MeshDistributor.h b/AMDiS/src/parallel/MeshDistributor.h
index d6926e26e0c499b6f8add4541ef6be15db343a1f..6b8c3559611d26c9f00772da2b9fca0f5d5a7616 100644
--- a/AMDiS/src/parallel/MeshDistributor.h
+++ b/AMDiS/src/parallel/MeshDistributor.h
@@ -201,7 +201,10 @@ namespace AMDiS {
     /// rank's partition, but is owned by some other rank.
     inline bool getIsRankDof(DegreeOfFreedom dof)
     {
-      return isRankDof[dof];
+      if (isRankDof.count(dof))
+	return isRankDof[dof];
+
+      return false;
     }
 
     inline long getLastMeshChangeIndex()
@@ -526,7 +529,7 @@ namespace AMDiS {
      * If periodic boundaries are used, this map stores to each periodic DOF in rank's
      * partition the set of periodic boundaries the DOF is associated to. In 2D, most
      * DOFs are only on one periodic boundary. Only, e.g., in a box with all boundaries
-     * being periodic, the for corners are associated by two different boundaries.     
+     * being periodic, the four corners are associated by two different boundaries.
      */
     map<int, std::set<BoundaryType> > periodicDofAssociations;
 
diff --git a/AMDiS/src/parallel/ParallelDebug.cc b/AMDiS/src/parallel/ParallelDebug.cc
index b312884a8f52da7bc66dde01a2daba50cbe1b0cd..0d27d777e14f4e0d400c2b6fa1fdd08dc78b2c9d 100644
--- a/AMDiS/src/parallel/ParallelDebug.cc
+++ b/AMDiS/src/parallel/ParallelDebug.cc
@@ -143,32 +143,6 @@ namespace AMDiS {
   {
     FUNCNAME("ParallelDebug::testPeriodicBoundary()");
 
-    DegreeOfFreedom testDof = pdb.mapGlobalToLocal(968);
-    if (testDof != -1) {
-      WorldVector<double> c;
-      pdb.mesh->getDofIndexCoords(testDof, pdb.feSpace, c);
-
-      MSG("FOUND: LOCAL %d   GLOBAL %d  COORDS %f %f %f\n",
-	  testDof, 968, c[0], c[1], c[2]);
-    }
-
-    testDof = pdb.mapGlobalToLocal(973);
-    if (testDof != -1) {
-      WorldVector<double> c;
-      pdb.mesh->getDofIndexCoords(testDof, pdb.feSpace, c);
-
-      MSG("FOUND: LOCAL %d   GLOBAL %d  COORDS %f %f %f\n",
-	  testDof, 973, c[0], c[1], c[2]);
-    }
-
-    testDof = pdb.mapGlobalToLocal(1795);
-    if (testDof != -1) {
-      WorldVector<double> c;
-      pdb.mesh->getDofIndexCoords(testDof, pdb.feSpace, c);
-
-      MSG("FOUND: LOCAL %d   GLOBAL %d  COORDS %f %f %f\n",
-	  testDof, 1795, c[0], c[1], c[2]);
-    }
     // === 1. check: All periodic DOFs should have at least a correct number ===
     // === of periodic associations.                                         ===
 
diff --git a/AMDiS/src/parallel/PetscSolver.cc b/AMDiS/src/parallel/PetscSolver.cc
index f83cb4da9a6472b17916b591f4d9fc9e94027fff..9dd3556c03b81b7f05c51b0b3bd0302d1e02039c 100644
--- a/AMDiS/src/parallel/PetscSolver.cc
+++ b/AMDiS/src/parallel/PetscSolver.cc
@@ -100,6 +100,9 @@ namespace AMDiS {
 	  // Test if the current col dof is a periodic dof.
 	  bool periodicCol = meshDistributor->isPeriodicDof(globalColDof);
 
+	  if (value(*icursor) == 0.0 && globalRowDof != globalColDof)
+	    continue;
+
 	  if (!periodicCol) {
 	    // Calculate the exact position of the column index in the petsc matrix.
 	    cols.push_back(globalColDof * dispMult + dispAddCol);
@@ -130,7 +133,7 @@ namespace AMDiS {
 	      }
 	    }
 
-	    for (int i = 0; i < newCols.size(); i++) {
+	    for (unsigned int i = 0; i < newCols.size(); i++) {
 	      cols.push_back(newCols[i] * dispMult + dispAddCol);
 	      values.push_back(scaledValue);	      
 	    }
@@ -140,12 +143,18 @@ namespace AMDiS {
 	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
 		     &(cols[0]), &(values[0]), ADD_VALUES);	
       } else {
+	std::map<int, std::vector<int> > colsMap;
+	std::map<int, std::vector<double> > valsMap;
+
 	for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
-	     icursor != icend; ++icursor) {	  
+	     icursor != icend; ++icursor) {
 
 	  // Global index of the current column index.
 	  int globalColDof = meshDistributor->mapLocalToGlobal(col(*icursor));
 
+	  if (value(*icursor) == 0.0 && globalRowDof != globalColDof)
+	    continue;
+
 	  std::set<int> perAsc;
 
 	  if (meshDistributor->isPeriodicDof(globalColDof)) {
@@ -157,14 +166,12 @@ namespace AMDiS {
 		perAsc.insert(*it);
 	  }
 
-	  if (meshDistributor->isPeriodicDof(globalRowDof)) {
-	    std::set<int>& perRowAsc = 
-	      meshDistributor->getPerDofAssociations(globalRowDof);
-	    for (std::set<int>::iterator it = perRowAsc.begin(); 
-		 it != perRowAsc.end(); ++it)
-	      if (*it >= -3)
-		perAsc.insert(*it);
-	  }
+	  std::set<int>& perRowAsc = 
+	    meshDistributor->getPerDofAssociations(globalRowDof);
+	  for (std::set<int>::iterator it = perRowAsc.begin(); 
+	       it != perRowAsc.end(); ++it)
+	    if (*it >= -3)
+	      perAsc.insert(*it);
 
 	  double scaledValue = 
 	    value(*icursor) * std::pow(0.5, static_cast<double>(perAsc.size()));
@@ -195,10 +202,23 @@ namespace AMDiS {
 	       eIt != entry.end(); ++eIt) {
 	    // Calculate petsc row index.
 	    int rowIndex = eIt->first * dispMult + dispAddRow;
+
 	    int colIndex = eIt->second * dispMult + dispAddCol;
-	    MatSetValue(petscMatrix, rowIndex, colIndex, scaledValue, ADD_VALUES);
+	    colsMap[rowIndex].push_back(colIndex);
+	    valsMap[rowIndex].push_back(scaledValue);
 	  }
 	}
+
+
+	for (std::map<int, std::vector<int> >::iterator rowIt = colsMap.begin();
+	     rowIt != colsMap.end(); ++rowIt) {
+	  TEST_EXIT_DBG(rowIt->second.size() == valsMap[rowIt->first].size())
+	    ("Should not happen!\n");
+
+	  int rowIndex = rowIt->first;
+	  MatSetValues(petscMatrix, 1, &rowIndex, rowIt->second.size(),
+		       &(rowIt->second[0]), &(valsMap[rowIt->first][0]), ADD_VALUES);
+	}
       }
     }
   }
@@ -256,111 +276,123 @@ namespace AMDiS {
     namespace traits = mtl::traits;
     typedef DOFMatrix::base_matrix_type Matrix;
     typedef std::vector<std::pair<int, int> > MatrixNnzEntry;
+    typedef std::map<int, DofContainer> RankToDofContainer;
 
     // Stores to each rank a list of nnz entries (i.e. pairs of row and column index)
-    // that this rank will send to. This nnz entries will be assembled on this rank,
+    // that this rank will send to. These nnz entries will be assembled on this rank,
     // but because the row DOFs are not DOFs of this rank they will be send to the
     // owner of the row DOFs.
     std::map<int, MatrixNnzEntry> sendMatrixEntry;
 
+    // Maps to each DOF that must be send to another rank the rank number of the
+    // receiving rank.
+    std::map<DegreeOfFreedom, int> sendDofToRank;
+
 
     // First, create for all ranks we send data to MatrixNnzEntry object with 0 entries.
-    typedef std::map<int, DofContainer> RankToDofContainer;
-    RankToDofContainer& recvDofs = meshDistributor->getRecvDofs();
-    for (RankToDofContainer::iterator it = recvDofs.begin();
-	 it != recvDofs.end(); ++it)
+    for (RankToDofContainer::iterator it = meshDistributor->getRecvDofs().begin();
+	 it != meshDistributor->getRecvDofs().end(); ++it) {
       sendMatrixEntry[it->first].resize(0);
 
+      for (DofContainer::iterator dofIt = it->second.begin();
+	   dofIt != it->second.end(); ++dofIt)
+	sendDofToRank[**dofIt] = it->first;
+    }
+
+
+    std::set<int> recvFromRank;
+    for (RankToDofContainer::iterator it = meshDistributor->getSendDofs().begin();
+	 it != meshDistributor->getSendDofs().end(); ++it)
+      recvFromRank.insert(it->first);
+
 
     for (int i = 0; i < nComponents; i++) {
       for (int j = 0; j < nComponents; j++) {
- 	if ((*mat)[i][j]) {
-	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();
+ 	if (!(*mat)[i][j])
+	  continue;
+
+	Matrix bmat = (*mat)[i][j]->getBaseMatrix();
 
-	  traits::col<Matrix>::type col(bmat);
-	  traits::const_value<Matrix>::type value(bmat);
+	traits::col<Matrix>::type col(bmat);
+	traits::const_value<Matrix>::type value(bmat);
 	  
-	  typedef traits::range_generator<row, Matrix>::type cursor_type;
-	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
+	typedef traits::range_generator<row, Matrix>::type cursor_type;
+	typedef traits::range_generator<nz, cursor_type>::type icursor_type;
+	
+	for (cursor_type cursor = begin<row>(bmat), 
+	       cend = end<row>(bmat); cursor != cend; ++cursor) {
 	  
-	  for (cursor_type cursor = begin<row>(bmat), 
-		 cend = end<row>(bmat); cursor != cend; ++cursor) {
-
-	    // Map the local row number to the global DOF index and create from it
-	    // the global PETSc row index of this DOF.
-	    int petscRowIdx = 
-	      meshDistributor->mapLocalToGlobal(*cursor) * nComponents + i;
-
-	    if (meshDistributor->getIsRankDof(*cursor)) {
+	  int globalRowDof = meshDistributor->mapLocalToGlobal(*cursor);
 
-	      // === The current row DOF is a rank dof, so create the corresponding ===
-	      // === nnz values directly on rank's nnz data.                        ===
-
-	      // This is the local row index of the local PETSc matrix.
-	      int localPetscRowIdx = 
-		petscRowIdx - meshDistributor->getRstart() * nComponents;
+	  std::vector<int> rows;
+	  rows.push_back(globalRowDof);
+	  std::vector<int> rowRank;
+	  if (meshDistributor->getIsRankDof(*cursor)) {
+	    rowRank.push_back(meshDistributor->getMpiRank());
+	  } else {
+	    // Find out who is the member of this DOF.
+	    TEST_EXIT_DBG(sendDofToRank.count(*cursor))("Should not happen!\n");
+	    
+	    rowRank.push_back(sendDofToRank[*cursor]);
+	  }
 
-	      TEST_EXIT_DBG(localPetscRowIdx >= 0 && localPetscRowIdx < nRankRows)
-		("Should not happen! \n Debug info: localRowIdx = %d   globalRowIndx = %d   petscRowIdx = %d   localPetscRowIdx = %d   rStart = %d   nCompontens = %d   nRankRows = %d\n",
-		 *cursor, meshDistributor->mapLocalToGlobal(*cursor), petscRowIdx, localPetscRowIdx, meshDistributor->getRstart(), nComponents, nRankRows);
+	  // Map the local row number to the global DOF index and create from it
+	  // the global PETSc row index of this DOF.
+	  
+	  int petscRowIdx = globalRowDof * nComponents + i;
+	  
+	  if (meshDistributor->getIsRankDof(*cursor)) {
+	    	    
+	    // === The current row DOF is a rank dof, so create the corresponding ===
+	    // === nnz values directly on rank's nnz data.                        ===
+	    
+	    // This is the local row index of the local PETSc matrix.
+	    int localPetscRowIdx = 
+	      petscRowIdx - meshDistributor->getRstart() * nComponents;
+	    
+	    TEST_EXIT_DBG(localPetscRowIdx >= 0 && localPetscRowIdx < nRankRows)
+	      ("Should not happen! \n Debug info: localRowIdx = %d   globalRowIndx = %d   petscRowIdx = %d   localPetscRowIdx = %d   rStart = %d   nCompontens = %d   nRankRows = %d\n",
+	       *cursor, meshDistributor->mapLocalToGlobal(*cursor), petscRowIdx, localPetscRowIdx, meshDistributor->getRstart(), nComponents, nRankRows);
+	    
+	    
+	    // Traverse all non zero entries in this row.
+	    for (icursor_type icursor = begin<nz>(cursor), 
+		   icend = end<nz>(cursor); icursor != icend; ++icursor) {
+	      int petscColIdx = 
+		meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j;
 	      
-	      // Traverse all non zero entries in this row.
-	      for (icursor_type icursor = begin<nz>(cursor), 
-		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
+	      if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) {
+		// The row DOF is a rank DOF, if also the column is a rank DOF, 
+		// increment the d_nnz values for this row, otherwise the o_nnz value.
+		if (petscColIdx >= meshDistributor->getRstart() * nComponents && 
+		    petscColIdx < meshDistributor->getRstart() * nComponents + nRankRows)
+		  d_nnz[localPetscRowIdx]++;
+		else
+		  o_nnz[localPetscRowIdx]++;
+	      }    
+	    }
+	  } else {
+	    // === The current row DOF is not a rank dof, i.e., it will be created ===
+	    // === on this rank, but after this it will be send to another rank    ===
+	    // === matrix. So we need to send also the corresponding nnz structure ===
+	    // === of this row to the corresponding rank.                          ===
+	    
+	    // Send all non zero entries to the member of the row DOF.
+	    int sendToRank = sendDofToRank[*cursor];
+	    
+	    for (icursor_type icursor = begin<nz>(cursor), 
+		   icend = end<nz>(cursor); icursor != icend; ++icursor) {
+	      if (value(*icursor) != 0.0) {
 		int petscColIdx = 
 		  meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j;
-
-		if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) {
-		  // The row DOF is a rank DOF, if also the column is a rank DOF, 
-		  // increment the d_nnz values for this row, otherwise the o_nnz value.
-		  if (petscColIdx >= meshDistributor->getRstart() * nComponents && 
-		      petscColIdx < meshDistributor->getRstart() * nComponents + nRankRows)
-		    d_nnz[localPetscRowIdx]++;
-		  else
-		    o_nnz[localPetscRowIdx]++;
-		}    
-	      }
-	    } else {
-	      typedef std::map<int, DofContainer> RankToDofContainer;
-
-	      // === The current row DOF is not a rank dof, i.e., it will be created ===
-	      // === on this rank, but after this it will be send to another rank    ===
-	      // === matrix. So we need to send also the corresponding nnz structure ===
-	      // === of this row to the corresponding rank.                          ===
-
-	      // Find out who is the member of this DOF.
-	      int sendToRank = -1;
-	      for (RankToDofContainer::iterator it = meshDistributor->getRecvDofs().begin();
-		   it != meshDistributor->getRecvDofs().end(); ++it) {
-		for (DofContainer::iterator dofIt = it->second.begin();
-		     dofIt != it->second.end(); ++dofIt) {
-		  if (**dofIt == *cursor) {
-		    sendToRank = it->first;
-		    break;
-		  }
-		}
-
-		if (sendToRank != -1)
-		  break;
-	      }
-
-	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");
-
-	      // Send all non zero entries to the member of the row DOF.
-	      for (icursor_type icursor = begin<nz>(cursor), 
-		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
-		if (value(*icursor) != 0.0) {
-		  int petscColIdx = 
-		    meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j;
-		  
-		  sendMatrixEntry[sendToRank].
-		    push_back(std::make_pair(petscRowIdx, petscColIdx));
-		}
+		
+		sendMatrixEntry[sendToRank].
+		  push_back(std::make_pair(petscRowIdx, petscColIdx));
 	      }
-
-	    } // if (isRankDof[*cursor]) ... else ...
-	  } // for each row in mat[i][j]
-	} // if mat[i][j]
+	    }
+	    
+	  } // if (isRankDof[*cursor]) ... else ...
+	} // for each row in mat[i][j]
       } 
     }
 
@@ -368,7 +400,9 @@ namespace AMDiS {
 
     StdMpi<MatrixNnzEntry> stdMpi(meshDistributor->getMpiComm(), true);
     stdMpi.send(sendMatrixEntry);
-    stdMpi.recv(meshDistributor->getSendDofs());
+    for (std::set<int>::iterator it = recvFromRank.begin(); 
+	 it != recvFromRank.end(); ++it)
+      stdMpi.recv(*it);
     stdMpi.startCommunication<int>(MPI_INT);
 
 
@@ -472,8 +506,8 @@ namespace AMDiS {
     for (int i = 0; i < nComponents; i++)
       for (int j = 0; j < nComponents; j++)
 	if ((*mat)[i][j])
-	  setDofMatrix((*mat)[i][j], nComponents, i, j);	
-	
+	  setDofMatrix((*mat)[i][j], nComponents, i, j);
+
 #if (DEBUG != 0)
     INFO(info, 8)("Fill petsc matrix 2 needed %.5f seconds\n", MPI::Wtime() - wtime);
 #endif