diff --git a/AMDiS/src/est/ResidualEstimator.cc b/AMDiS/src/est/ResidualEstimator.cc
index 87af4f670e310066dca39cc70e102b7efd3322f8..c96da9e627047320ed0835653bb8ced8de5eccfc 100644
--- a/AMDiS/src/est/ResidualEstimator.cc
+++ b/AMDiS/src/est/ResidualEstimator.cc
@@ -20,6 +20,7 @@
 
 #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
 #include <mpi.h>
+#include "parallel/MeshDistributor.h"
 #endif
 
 namespace AMDiS {
@@ -152,7 +153,128 @@ namespace AMDiS {
 	  secondOrderTerms[system] = secondOrderTerms[system] || (*it)->secondOrderTerms();
       }
     }
+
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+    initParallel();
+#endif
+  }
+
+
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+  void ResidualEstimator::initParallel()
+  {
+    FUNCNAME("ResidualEstimator::initParallel()");
+
+    if (C1 == 0.0)
+      return;
+
+    DOFVector<WorldVector<double> > coords(uh[0]->getFeSpace(), "tmp");
+    mesh->getDofIndexCoords(uh[0]->getFeSpace(), coords);
+
+    InteriorBoundary &intBoundary = 
+      MeshDistributor::globalMeshDistributor->getIntBoundary();
+    RankToBoundMap& otherBound = intBoundary.getOther();
+
+    ElInfo *elInfo = mesh->createNewElInfo();
+    elInfo->setFillFlag(Mesh::FILL_COORDS);
+
+    StdMpi<vector<double> > stdMpiDet(MeshDistributor::globalMeshDistributor->getMpiComm());
+    StdMpi<vector<vector<WorldVector<double> > > > stdMpiGrdUh(MeshDistributor::globalMeshDistributor->getMpiComm());
+
+    for (RankToBoundMap::iterator it = otherBound.begin();
+	 it != otherBound.end(); ++it) {
+      for (unsigned int i = 0; i < it->second.size(); i++) {
+	BoundaryObject &bObj = it->second[i].rankObj;
+	if (bObj.subObj == VERTEX)
+	  continue;
+
+	vector<BoundaryObject> subBound;
+	bObj.el->getSubBoundary(bObj, subBound);
+
+	WorldVector<int> faceIndices;
+
+	for (unsigned int j = 0; j < subBound.size(); j++) {
+	  Element *el = subBound[j].el;	  
+	  int oppV = subBound[j].ithObj;
+
+	  elInfo->setElement(el);
+	  el->sortFaceIndices(oppV, faceIndices);
+
+	  elInfo->getCoord(oppV) = coords[el->getDof(oppV, 0)];
+	  for (int k = 0; k < dim; k++)
+	    elInfo->getCoord(faceIndices[k]) = coords[el->getDof(k, 0)];
+
+	  double detNeigh = abs(elInfo->calcGrdLambda(*lambdaNeigh));
+	  stdMpiDet.getSendData(it->first).push_back(detNeigh);
+
+	  for (int system = 0; system < nSystems; system++) {
+	    if (matrix[system] == NULL || secondOrderTerms[system] == false) 
+	      continue;
+
+	    uh[system]->getLocalVector(el, uhNeigh[system]);
+
+	    for (int iq = 0; iq < nPointsSurface; iq++) {
+
+	      (*lambda)[oppV] = 0.0;
+	      for (int k = 0; k < dim; k++)
+		(*lambda)[faceIndices[k]] = surfaceQuad->getLambda(iq, k);
+
+	      basFcts[system]->evalGrdUh(*lambda, *lambdaNeigh, uhNeigh[system], grdUhNeigh[iq]);
+	    }
+
+	    stdMpiGrdUh.getSendData(it->first).push_back(grdUhNeigh);
+	  }
+	}
+      }
+    }
+
+    stdMpiDet.updateSendDataSize();
+    stdMpiGrdUh.updateSendDataSize();
+
+    RankToBoundMap& ownBound = intBoundary.getOwn();
+
+    for (RankToBoundMap::iterator it = ownBound.begin();
+	 it != ownBound.end(); ++it) {
+
+      for (unsigned int i = 0; i < it->second.size(); i++) {
+	BoundaryObject &bObj = it->second[i].rankObj;
+	if (bObj.subObj != VERTEX) {
+	  stdMpiDet.recv(it->first);
+	  stdMpiGrdUh.recv(it->first);
+	  break;
+	}
+      }
+    }
+
+    stdMpiDet.startCommunication();
+    stdMpiGrdUh.startCommunication();
+
+
+    for (RankToBoundMap::iterator it = ownBound.begin();
+	 it != ownBound.end(); ++it) {
+      vector<BoundaryObject> subBound;
+	
+      for (unsigned int i = 0; i < it->second.size(); i++) {
+	BoundaryObject &bObj = it->second[i].rankObj;
+	if (bObj.subObj == VERTEX)
+	  continue;
+
+	bObj.el->getSubBoundary(bObj, subBound);
+      }
+      
+      if (subBound.size() == 0)
+	continue;
+
+      TEST_EXIT_DBG(subBound.size() == stdMpiDet.getRecvData(it->first).size())
+	("Should not happen: %d %d from rank %d\n", subBound.size(), stdMpiDet.getRecvData(it->first).size(), it->first);
+      
+      TEST_EXIT_DBG(subBound.size() == stdMpiGrdUh.getRecvData(it->first).size())
+	("Should not happen!\n");      
+    }    
+
+    MSG("INIT PARALLEL FINISCHED!\n");
   }
+#endif
 
 
   void ResidualEstimator::exit(bool output)
@@ -356,7 +478,6 @@ namespace AMDiS {
     FUNCNAME("ResidualEstimator::computeJumpResidual()");
 
     double result = 0.0;
-    int dow = Global::getGeo(WORLD);
     Element *el = elInfo->getElement();
     const DimVec<WorldVector<double> > &grdLambda = elInfo->getGrdLambda();
     double det = elInfo->getDet();
@@ -373,10 +494,8 @@ namespace AMDiS {
       el->sortFaceIndices(face, faceIndEl);
       neigh->sortFaceIndices(oppV, faceIndNeigh);	
       neighInfo->setElement(const_cast<Element*>(neigh));
-      neighInfo->setFillFlag(Mesh::FILL_COORDS);
-      
-      for (int i = 0; i < dow; i++)
-	neighInfo->getCoord(oppV)[i] = elInfo->getOppCoord(face)[i];
+      neighInfo->setFillFlag(Mesh::FILL_COORDS);     
+      neighInfo->getCoord(oppV) = elInfo->getOppCoord(face);
       
       // periodic leaf data ?
       ElementData *ldp = el->getElementData()->getElementData(PERIODIC);	
@@ -411,8 +530,7 @@ namespace AMDiS {
 	for (int i = 0; i < dim; i++) {
 	  int i1 = faceIndEl[i];
 	  int i2 = faceIndNeigh[i];
-	  for (int j = 0; j < dow; j++)
-	    neighInfo->getCoord(i2)[j] = elInfo->getCoord(i1)[j];
+	  neighInfo->getCoord(i2) = elInfo->getCoord(i1);
 	}
       }
 	
@@ -441,7 +559,7 @@ namespace AMDiS {
 	  
 	  (*lambda)[oppV] = 0.0;
 	  for (int i = 0; i < dim; i++)
-	    (*lambda)[faceIndNeigh[i]] = surfaceQuad->getLambda(iq, i);		  
+	    (*lambda)[faceIndNeigh[i]] = surfaceQuad->getLambda(iq, i);
 	  
 	  basFcts[system]->evalGrdUh(*lambda, *lambdaNeigh, uhNeigh[system], grdUhNeigh[iq]);
 	  
diff --git a/AMDiS/src/est/ResidualEstimator.h b/AMDiS/src/est/ResidualEstimator.h
index 862f214ce6064aac6200213190ea4ba6db12ecbb..ec46b71a4b8e7e9ed9aef4a569b87ba95d70bf9e 100644
--- a/AMDiS/src/est/ResidualEstimator.h
+++ b/AMDiS/src/est/ResidualEstimator.h
@@ -85,13 +85,17 @@ namespace AMDiS {
     /// Constructor.
     ResidualEstimator(std::string name, int r);
 
-    virtual void init(double timestep);
+    void init(double timestep);
+
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+    void initParallel();
+#endif
 
     /// Estimates the error on an element. For more information about the
     /// parameter, see the description \ref Estimator::estimateElement.
-    virtual void estimateElement(ElInfo *elInfo, DualElInfo *dualElInfo = NULL);
+    void estimateElement(ElInfo *elInfo, DualElInfo *dualElInfo = NULL);
 
-    virtual void exit(bool output = true);
+    void exit(bool output = true);
 
   protected:
     /// Computes the element residual for a given element.
diff --git a/AMDiS/src/est/SimpleResidualEstimator.cc b/AMDiS/src/est/SimpleResidualEstimator.cc
index e36b03203f37c990848cd0a152e786d68cd6e412..0635e87b64ad7dc963cc5debc913298e171f7f79 100644
--- a/AMDiS/src/est/SimpleResidualEstimator.cc
+++ b/AMDiS/src/est/SimpleResidualEstimator.cc
@@ -230,7 +230,6 @@ namespace AMDiS {
 
     // === Init temporary variables. ===
     double result = 0.0;
-    int dow = Global::getGeo(WORLD);
     Element *el = elInfo->getElement();
     const DimVec<WorldVector<double> > &grdLambda = elInfo->getGrdLambda();
     double det = elInfo->getDet();
@@ -251,16 +250,13 @@ namespace AMDiS {
       el->sortFaceIndices(face, faceIndEl);
       neigh->sortFaceIndices(oppV, faceIndNeigh);	
       neighInfo->setElement(const_cast<Element*>(neigh));
-      neighInfo->setFillFlag(Mesh::FILL_COORDS);
-      
-      for (int i = 0; i < dow; i++)
-	neighInfo->getCoord(oppV)[i] = elInfo->getOppCoord(face)[i];
+      neighInfo->setFillFlag(Mesh::FILL_COORDS);     
+      neighInfo->getCoord(oppV) = elInfo->getOppCoord(face);
       
       for (int i = 0; i < dim; i++) {
 	int i1 = faceIndEl[i];
 	int i2 = faceIndNeigh[i];
-	for (int j = 0; j < dow; j++)
-	  neighInfo->getCoord(i2)[j] = elInfo->getCoord(i1)[j];
+	neighInfo->getCoord(i2) = elInfo->getCoord(i1);
       }      
 	
       // Compute determinant of the neighbouring element.
diff --git a/AMDiS/src/parallel/StdMpi.cc b/AMDiS/src/parallel/StdMpi.cc
index 4bde99ca70dd3ab5537c72e0784a6125c40c9433..f02930f11c96ca5360d7401c3b09676f35e75cfc 100644
--- a/AMDiS/src/parallel/StdMpi.cc
+++ b/AMDiS/src/parallel/StdMpi.cc
@@ -26,7 +26,7 @@ namespace AMDiS {
   MPI_Datatype StdMpiHelper<vector<std::pair<int, int> > >::mpiDataType = MPI_INT;
   MPI_Datatype StdMpiHelper<vector<WorldVector<double> > >::mpiDataType = MPI_DOUBLE;
   MPI_Datatype StdMpiHelper<map<WorldVector<double>, int> >::mpiDataType = MPI_DOUBLE;
-
+  MPI_Datatype StdMpiHelper<vector<vector<WorldVector<double> > > >::mpiDataType = MPI_DOUBLE;
 
   // T = int
 
@@ -449,4 +449,48 @@ namespace AMDiS {
     }
   }
 
+
+  // T = vector<vector<WorldVector<double> > >
+
+  int StdMpiHelper<vector<vector<WorldVector<double> > > >::getBufferSize(vector<vector<WorldVector<double> > > &data)
+  {
+    int result = 1;
+
+    for (unsigned int i = 0; i < data.size(); i++)
+      result += 1 + (data[i].size() * Global::getGeo(WORLD));
+
+    return result;
+  }
+
+  void StdMpiHelper<vector<vector<WorldVector<double> > > >::createBuffer(vector<vector<WorldVector<double> > > &data, 
+									  double* buf)
+  {
+    int counter = 0;
+
+    buf[counter++] = static_cast<double>(data.size());
+
+    for (unsigned int i = 0; i < data.size(); i++) {
+      buf[counter++] = data[i].size();
+      for (unsigned int j = 0; j < data[i].size(); j++)
+	for (unsigned int k = 0; k < Global::getGeo(WORLD); k++) 
+	  buf[counter++] = data[i][j][k];
+    }
+  }
+
+  void StdMpiHelper<vector<vector<WorldVector<double> > > >::makeFromBuffer(vector<vector<WorldVector<double> > > &data, 
+									    double* buf, int bufSize)
+  {
+    int counter = 0;
+    data.resize(static_cast<int>(buf[counter++]));
+
+    for (unsigned int i = 0; i < data.size(); i++) {
+      data[i].resize(static_cast<int>(buf[counter++]));
+      for (unsigned int j = 0; j < data[i].size(); j++) 
+	for (unsigned int k = 0; k < Global::getGeo(WORLD); k++) 
+	  data[i][j][k] = buf[counter++];
+    }
+
+    TEST_EXIT_DBG(counter == bufSize)("There is something very wrong!\n");
+  }
+
 }
diff --git a/AMDiS/src/parallel/StdMpi.h b/AMDiS/src/parallel/StdMpi.h
index e7cadce63f077fb8f82f44b1c3687de9c2ee028d..0800cacac6e2a034c81d629f25084ceb136be84a 100644
--- a/AMDiS/src/parallel/StdMpi.h
+++ b/AMDiS/src/parallel/StdMpi.h
@@ -229,6 +229,22 @@ namespace AMDiS {
 
 
 
+  template<>
+  struct StdMpiHelper<vector<vector<WorldVector<double> > > > {
+    static MPI_Datatype mpiDataType;
+
+    typedef double cppDataType;
+
+    typedef vector<vector<WorldVector<double> > >  DataType;
+
+    static int getBufferSize(DataType &data);
+
+    static void createBuffer(DataType &data, double* buf);
+
+    static void makeFromBuffer(DataType &data, double* buf, int bufSize);
+  };
+
+
   /** \brief
    * This class is used to easily send and receive STL containers using MPI.
    */
@@ -302,14 +318,14 @@ namespace AMDiS {
 
 
     /// Returns sending data, see \ref sendData.
-    map<int, SendT>& getSendData()
+    inline map<int, SendT>& getSendData()
     {
       return sendData;
     }
 
 
     /// Returns the data that should be send to a specific rank, see \ref sendData.
-    SendT& getSendData(int rank)
+    inline SendT& getSendData(int rank)
     {
       return sendData[rank];
     }