diff --git a/AMDiS/src/DOFMatrix.cc b/AMDiS/src/DOFMatrix.cc
index 6104d8ee28a9fe9ca219d8b8c9c9e38dec0ef5d1..dbd95c9464a6f6a0c1ebc477336d2cb7a0c66f0f 100644
--- a/AMDiS/src/DOFMatrix.cc
+++ b/AMDiS/src/DOFMatrix.cc
@@ -238,10 +238,10 @@ namespace AMDiS {
 	if (condition->applyBoundaryCondition()) {
 
 #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
-	  // 	  if (dofMap->isRankDof(rowIndices[i])) {
+	   	  if (dofMap->isRankDof(rowIndices[i])) {
  	    applyDBCs.insert(row);
 	    //	    dirichletDofs.push_back(row);
-	    //	  }
+	    	  }
 #else
  	  applyDBCs.insert(row);
 #endif
@@ -496,13 +496,13 @@ namespace AMDiS {
     // Do the following only in sequential code. In parallel mode, the specific
     // solver method must care about dirichlet boundary conditions.
 
-#ifndef HAVE_PARALLEL_DOMAIN_AMDIS
+    //#ifndef HAVE_PARALLEL_DOMAIN_AMDIS
     inserter_type &ins = *inserter;  
     for (std::set<int>::iterator it = rows->begin(); it != rows->end(); ++it)
       ins[*it][*it] = 1.0;    
 
     rows->clear();
-#endif
+    //#endif
   }
 
 
diff --git a/AMDiS/src/DOFVector.cc b/AMDiS/src/DOFVector.cc
index e05c35a45c5eca0519368ae1a97164cbdba7ded8..601e4ede5df8ad2d35456a9068b8b2f2ed18461e 100644
--- a/AMDiS/src/DOFVector.cc
+++ b/AMDiS/src/DOFVector.cc
@@ -664,13 +664,13 @@ namespace AMDiS {
       std::vector<Operator*>::iterator it;
       std::vector<double*>::iterator factorIt;
 
-      for (it = this->operators.begin(), factorIt = this->operatorFactor.begin();	
+      for (it = this->operators.begin(), factorIt = this->operatorFactor.begin();
 	   it != this->operators.end(); 
 	   ++it, ++factorIt)
 	if ((*it)->getNeedDualTraverse() == false) {
 	  (*it)->getElementVector(elInfo, this->elementVector, 
 				  *factorIt ? **factorIt : 1.0);      
-	  addVector = true;
+	  addVector = true;	  
 	}
     }
 
diff --git a/AMDiS/src/DOFVector.h b/AMDiS/src/DOFVector.h
index d334ab56116745e8bfe735d28d6b2e49166a61bb..993f92b6fc17b8e821849d264c69cccc20841d76 100644
--- a/AMDiS/src/DOFVector.h
+++ b/AMDiS/src/DOFVector.h
@@ -194,12 +194,10 @@ namespace AMDiS {
 
     Flag getAssembleFlag();
 
-    /** \brief
-     * Evaluates \f[ u_h(x(\lambda)) = \sum_{i=0}^{m-1} vec[ind[i]] * 
-     * \varphi^i(\lambda) \f] where \f$ \varphi^i \f$ is the i-th basis function,
-     * \f$ x(\lambda) \f$ are the world coordinates of lambda and
-     * \f$ m \f$ is the number of basis functions
-     */
+    /// Evaluates \f[ u_h(x(\lambda)) = \sum_{i=0}^{m-1} vec[ind[i]] * 
+    /// \varphi^i(\lambda) \f] where \f$ \varphi^i \f$ is the i-th basis
+    /// function, \f$ x(\lambda) \f$ are the world coordinates of lambda
+    /// and \f$ m \f$ is the number of basis functions
     T evalUh(const DimVec<double>& lambda, DegreeOfFreedom* ind);
 
     inline vector<Operator*>& getOperators() 
diff --git a/AMDiS/src/DOFVector.hh b/AMDiS/src/DOFVector.hh
index 15085ee91fc07fb259592f391ae590b0784ecf09..48aeaf7a1b9aa54cac4e38b9b87a3b5c44524815 100644
--- a/AMDiS/src/DOFVector.hh
+++ b/AMDiS/src/DOFVector.hh
@@ -142,7 +142,8 @@ namespace AMDiS {
     FUNCNAME("DOFVector::addElementVector()");
 
     std::vector<DegreeOfFreedom> indices(nBasFcts);
-    feSpace->getBasisFcts()->getLocalIndices(elInfo->getElement(), feSpace->getAdmin(),
+    feSpace->getBasisFcts()->getLocalIndices(elInfo->getElement(), 
+					     feSpace->getAdmin(),
 					     indices);
 
     for (DegreeOfFreedom i = 0; i < nBasFcts; i++) {
@@ -155,7 +156,7 @@ namespace AMDiS {
 	if (add)
 	  (*this)[irow] += factor * elVec[i];
 	else  
-	  (*this)[irow] = factor * elVec[i];	
+	  (*this)[irow] = factor * elVec[i];
       }
     }
   }
@@ -1205,10 +1206,36 @@ namespace AMDiS {
        x.getFeSpace()->getAdmin(), result.getFeSpace()->getAdmin());
 
 
-    if (transpose == NoTranspose)
-      mult(a.getBaseMatrix(), x, result);
-    else if (transpose == Transpose)
-      mult(trans(const_cast<DOFMatrix::base_matrix_type&>(a.getBaseMatrix())), x, result);
+    if (transpose == NoTranspose) {
+      mtl::dense_vector<T> tmp_x(x.getUsedSize());
+      mtl::dense_vector<T> tmp_result(result.getUsedSize());
+
+      {
+	int counter = 0;
+	typename DOFVector<T>::Iterator it(const_cast<DOFVector<T>*>(&x), USED_DOFS);
+	for (it.reset(); !it.end(); ++it)
+	  tmp_x[counter++] = *it;
+      }
+
+      mult(a.getBaseMatrix(), tmp_x, tmp_result);
+
+      {
+	int counter = 0;
+	typename DOFVector<T>::Iterator it(&result, USED_DOFS);
+	for (it.reset(); !it.end(); ++it)
+	  if (add)
+	    *it += tmp_result[counter++];
+	  else
+	    *it = tmp_result[counter++];
+      }
+
+
+    } else if (transpose == Transpose) {
+      ERROR_EXIT("Not yet implemented!\n");
+
+//       mult(trans(const_cast<DOFMatrix::base_matrix_type&>(a.getBaseMatrix())),
+// 	   x, result);
+    }
     else 
       ERROR_EXIT("transpose = %d\n", transpose);
   }
diff --git a/AMDiS/src/Debug.h b/AMDiS/src/Debug.h
index eeda743f3df715a601ca1c1e21ee7f0018c94be0..a48fbae50e3604c50a1fda42b2b337360a4bfd11 100644
--- a/AMDiS/src/Debug.h
+++ b/AMDiS/src/Debug.h
@@ -78,15 +78,18 @@ namespace AMDiS {
 			 std::string filename);
 
     /** \brief
-     * Creates a vtu file where all elements in the mesh are colored by the global
-     * element indices.
+     * Creates a vtu file where all elements in the mesh are colored by the
+     * global element indices.
      *
      * \param[in]  feSpace   The FE space to be used.
      * \param[in]  filename  Name of the file.
-     * \param[in]  level     If level is -1, all leaf elements will be put to the
-     *                       output file, otherwise the elements with the given level.
+     * \param[in]  level     If level is -1, all leaf elements will be put to
+     *                       the output file, otherwise the elements with the
+     *                       given level.
      */
-    void writeElementIndexMesh(Mesh *mesh, std::string filename, int level = -1);
+    void writeElementIndexMesh(Mesh *mesh, 
+			       std::string filename, 
+			       int level = -1);
 
     void highlightElementIndexMesh(Mesh *mesh, int idx, std::string filename);
 
diff --git a/AMDiS/src/DirichletBC.cc b/AMDiS/src/DirichletBC.cc
index a5f5ec79e2fe4d5d572d067317e4e341f0b7bb22..ce9a8fdb5954d72b0179c49fac24a54a75dae064 100644
--- a/AMDiS/src/DirichletBC.cc
+++ b/AMDiS/src/DirichletBC.cc
@@ -65,7 +65,7 @@ namespace AMDiS {
 
     for (int i = 0; i < nBasFcts; i++) {
 #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
-      //      if (vector->isRankDof(dofIndices[i]))
+      if (vector->isRankDof(dofIndices[i]))
 #endif
       if (localBound[i] == boundaryType) {
 	double value = 0.0;
@@ -76,11 +76,11 @@ namespace AMDiS {
 	if (dofVec)
 	  value = (*dofVec)[dofIndices[i]];
 
-#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
-	vector->setDirichletDofValue(dofIndices[i], value);
-#else
+// #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+// 	vector->setDirichletDofValue(dofIndices[i], value);
+// #else
 	(*vector)[dofIndices[i]] = value;
-#endif
+	//#endif
       }
     }
   }
diff --git a/AMDiS/src/FirstOrderAssembler.h b/AMDiS/src/FirstOrderAssembler.h
index 9f9a4347706798ca3ccfbbb3fe00b2b7da5e71d3..298522537ae005ca712722e9fef513dce202fad8 100644
--- a/AMDiS/src/FirstOrderAssembler.h
+++ b/AMDiS/src/FirstOrderAssembler.h
@@ -37,13 +37,11 @@ namespace AMDiS {
   class FirstOrderAssembler : public SubAssembler
   {
   public:
-    /** \brief
-     * Creates and returns the FirstOrderAssembler for Operator op and
-     * the given assembler. If all terms are piecewise constant precalculated 
-     * integrals can be used while assembling and the returned 
-     * ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
-     * be returned.
-     */
+    /// Creates and returns the FirstOrderAssembler for Operator op and
+    /// the given assembler. If all terms are piecewise constant precalculated 
+    /// integrals can be used while assembling and the returned 
+    /// ZeroOrderAssembler is of type Pre0. Otherwise a Quad0 object will
+    /// be returned.
     static FirstOrderAssembler* getSubAssembler(Operator *op,
 						Assembler *assembler,
 						Quadrature *quadrat,
diff --git a/AMDiS/src/FirstOrderTerm.h b/AMDiS/src/FirstOrderTerm.h
index 51bf445add055d5ee67b43f4508e2221d15bea22..01bafb942671515a3ece3ec1fdd6eba1b5e98f2e 100644
--- a/AMDiS/src/FirstOrderTerm.h
+++ b/AMDiS/src/FirstOrderTerm.h
@@ -76,10 +76,8 @@ namespace AMDiS {
     }
 
   protected:
-    /** \brief
-     * Evaluation of \f$ \Lambda \cdot b\f$ if b contains the value 1.0 in 
-     * each component.
-     */
+    /// Evaluation of \f$ \Lambda \cdot b\f$ if b contains the value 1.0
+    /// in each component.
     inline void l1(const DimVec<WorldVector<double> >& Lambda,
 		   mtl::dense_vector<double>& Lb,
 		   double factor) const
diff --git a/AMDiS/src/Operator.h b/AMDiS/src/Operator.h
index c970c317f6562e834502c45bce06702657866a02..5e37864c18c356a17ba2e6f3508fc170c70b2e19 100644
--- a/AMDiS/src/Operator.h
+++ b/AMDiS/src/Operator.h
@@ -100,10 +100,8 @@ namespace AMDiS {
     /// Adds a SecondOrderTerm to the Operator
     void addTerm(SecondOrderTerm *term);
 
-    /** \brief
-     * Calculates the element matrix for this ElInfo and adds it multiplied by
-     * factor to userMat.
-     */
+    /// Calculates the element matrix for this ElInfo and adds it multiplied by
+    /// factor to userMat.
     virtual void getElementMatrix(const ElInfo *elInfo, 
 				  ElementMatrix& userMat, 
 				  double factor = 1.0);
@@ -116,10 +114,8 @@ namespace AMDiS {
 				  ElementMatrix& userMat,				  
 				  double factor = 1.0);
 
-    /** \brief
-     * Calculates the element vector for this ElInfo and adds it multiplied by
-     * factor to userVec.
-     */
+    /// Calculates the element vector for this ElInfo and adds it multiplied by
+    /// factor to userVec.
     virtual void getElementVector(const ElInfo *elInfo, 
 				  ElementVector& userVec, 
 				  double factor = 1.0);
diff --git a/AMDiS/src/OperatorTerm.h b/AMDiS/src/OperatorTerm.h
index d9f0d4c40f0d699821aa972baa4ae8a99f842b21..1bfa57f8de70d6a8f095501c13c9188fe069e9da 100644
--- a/AMDiS/src/OperatorTerm.h
+++ b/AMDiS/src/OperatorTerm.h
@@ -107,10 +107,8 @@ namespace AMDiS {
 		      mtl::dense_vector<double>& result,
 		      double factor) = 0;
 
-    /** \brief
-     * Determines the value of a dof vector at the quadrature points of a given 
-     * element. It is used by all VecAtQP like operator terms.
-     */
+    /// Determines the value of a dof vector at the quadrature points of a given 
+    /// element. It is used by all VecAtQP like operator terms.
     template<typename T>
     void getVectorAtQPs(DOFVectorBase<T>* vec,
 			const ElInfo* elInfo, 
@@ -118,12 +116,11 @@ namespace AMDiS {
 			Quadrature *quad,
 			mtl::dense_vector<T>& vecAtQPs);
 
-    /** \brief
-     * Determines the value of a dof vector at the quadrature points of a given 
-     * element. This function is used, if an operator is assembled on two different
-     * meshes using the dual traverse. The element, i.e. the small or the large one,
-     * is choosen, which corresponds to the mesh the dof vector is defined on.
-     */
+    /// Determines the value of a dof vector at the quadrature points of a given 
+    /// element. This function is used, if an operator is assembled on two
+    /// different meshes using the dual traverse. The element, i.e. the small or
+    /// the large one, is choosen, which corresponds to the mesh the dof vector 
+    /// is defined on.
     template<typename T>
     void getVectorAtQPs(DOFVectorBase<T>* vec,
 			const ElInfo* smallElInfo, 
@@ -165,13 +162,11 @@ namespace AMDiS {
     /// Pointer to the Operator this OperatorTerm belongs to.
     Operator* operat;
 
-    /** \brief
-     * In many cases, the vector b in the evaluation \f$ \Lambda \cdot b\f$ has zeros
-     * in all components expect one that is set to one. Using the function \ref lb is
-     * then unnecessary time consuming. Instead, this variable defines the component
-     * of the vector b to be one. The function \ref lb_one is used if this variable is
-     * not -1.
-     */
+    /// In many cases, the vector b in the evaluation \f$ \Lambda \cdot b\f$ has
+    /// zeros in all components expect one that is set to one. Using the function
+    /// \ref lb is then unnecessary time consuming. Instead, this variable
+    /// defines the component of the vector b to be one. The function \ref lb_one
+    /// is used if this variable is not -1.
     int bOne;
 
     /// Flag for piecewise constant terms
diff --git a/AMDiS/src/ProblemStat.cc b/AMDiS/src/ProblemStat.cc
index a52db0219ef0ee70b0966108a4ddb57e53fae45d..a010db43d7d146c2f245933cb6a51317314ec56f 100644
--- a/AMDiS/src/ProblemStat.cc
+++ b/AMDiS/src/ProblemStat.cc
@@ -1674,7 +1674,13 @@ namespace AMDiS {
     // == private matrix and vector to the global one.                         ==
 
     if (matrix)
-      matrix->removeRowsWithDBC(matrix->getApplyDBCs());      
+      matrix->removeRowsWithDBC(matrix->getApplyDBCs());
+
+    if (matrix)
+      matrix->finishAssembling();
+
+    if (vector)
+      vector->finishAssembling();
 
     if (useGetBound)
       delete [] bound;     
diff --git a/AMDiS/src/SubAssembler.hh b/AMDiS/src/SubAssembler.hh
index 7cc922a8217a21d83a109db55245c9a4c0ab2695..7dd43945270948ea2231634d5640da5dcfc82094 100644
--- a/AMDiS/src/SubAssembler.hh
+++ b/AMDiS/src/SubAssembler.hh
@@ -74,7 +74,6 @@ namespace AMDiS {
     cachedValuesAtQPs[vec]->valid = true;
     cachedValuesAtQPs[vec]->quad = localQuad;
     vecAtQPs = values;
-
   }
 
 
diff --git a/AMDiS/src/SystemVector.h b/AMDiS/src/SystemVector.h
index e7bae72e2122fb0d0db0af5b282f870bb43c39ef..473167777806bbd903571b49411e3bf0fdb78efb 100644
--- a/AMDiS/src/SystemVector.h
+++ b/AMDiS/src/SystemVector.h
@@ -367,8 +367,7 @@ namespace AMDiS {
 		 bool               add = false)
   {
     FUNCNAME("mv()");
-    TEST_EXIT(false)("This function is not supported any more.\n");
-#if 0
+
     int size = x.getNumVectors();
     int i;
 
@@ -388,7 +387,6 @@ namespace AMDiS {
 		     *(result.getDOFVector(i)),
 		     true);
     }
-#endif
   }
 
   /// y = a*x + y;
diff --git a/AMDiS/src/est/RecoveryEstimator.cc b/AMDiS/src/est/RecoveryEstimator.cc
index e33b70c098feb77e40eb271de07a04a8ed81de10..f24e3c2f737078318ff233a87efe8f942cbc31c1 100644
--- a/AMDiS/src/est/RecoveryEstimator.cc
+++ b/AMDiS/src/est/RecoveryEstimator.cc
@@ -45,10 +45,12 @@ namespace AMDiS {
       }
     } else {
       degree = uh_->getFeSpace()->getBasisFcts()->getDegree() + 1;    
-      feSpace = FiniteElemSpace::provideFeSpace(NULL,
-						Lagrange::getLagrange(uh_->getFeSpace()->getMesh()->getDim(), degree),
-						uh_->getFeSpace()->getMesh(),
-						name + "->feSpace");
+      feSpace = 
+	FiniteElemSpace::provideFeSpace(NULL,
+					Lagrange::getLagrange(uh_->getFeSpace()->getMesh()->getDim(),
+							      degree),
+					uh_->getFeSpace()->getMesh(),
+					name + "->feSpace");
 
       if (method == 2) {
 	ERROR("Simple averaging only for the H1_NORM; using SPR instead\n");
diff --git a/AMDiS/src/parallel/BddcMlSolver.cc b/AMDiS/src/parallel/BddcMlSolver.cc
index eeac4bebd8591fa35a1e09ad58472db03e8e6d3a..682ba3daf383862dde3a2517cae545a89583e3e2 100644
--- a/AMDiS/src/parallel/BddcMlSolver.cc
+++ b/AMDiS/src/parallel/BddcMlSolver.cc
@@ -340,7 +340,7 @@ namespace AMDiS {
 
     int method = 1;
     double tol = 1.e-6;
-    int maxit = 10;
+    int maxit = 1000;
     int ndecrmax = 30;
     int num_iter = 0;
     int converged_reason = 0;
diff --git a/AMDiS/src/parallel/InteriorBoundary.cc b/AMDiS/src/parallel/InteriorBoundary.cc
index f907b32cc437db8502169e101126cb3e620e5880..4140ede0f64de6a592aa491b24cf9bbba3eecd8e 100644
--- a/AMDiS/src/parallel/InteriorBoundary.cc
+++ b/AMDiS/src/parallel/InteriorBoundary.cc
@@ -222,7 +222,7 @@ namespace AMDiS {
 	bound.neighObj.ithObj = perEdgeEl1.ithObject;
 	
 	bound.type = it->second;
-	
+
 	AtomicBoundary& b = getNewPeriodic(otherElementRank);
 	b = bound;
      
diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index aae499703c286b5ccc0f045a121f08226027aa67..4d9047df5fbc6dae662ac5a980b2f5ecfd26f8e8 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -1209,11 +1209,14 @@ namespace AMDiS {
   void MeshDistributor::repartitionMesh()
   {
     FUNCNAME("MeshDistributor::repartitionMesh()");
-
+    
     // === First we check if the rank with the maximum number of DOFs has at  ===
     // === least 20% more DOFs than the rank with the minimum number of DOFs. ===
     // === In this case, the mesh will be repartition.                        ===
 
+    double inbalanceFactor = 1.2;
+    Parameters::get("parallel->repartitioning->inbalance", inbalanceFactor);
+
     int repartitioning = 0;
     vector<int> nDofsInRank(mpiSize);
     int nDofs = mesh->getDofAdmin(0).getUsedDofs();
@@ -1232,7 +1235,8 @@ namespace AMDiS {
       MSG("Overall DOFs: %d    Min DOFs: %d    Max DOFs: %d\n", 
 	  nOverallDofs, minDofs, maxDofs);
 
-      if (static_cast<double>(maxDofs) / static_cast<double>(minDofs) > 1.2) 
+      if (static_cast<double>(maxDofs) / static_cast<double>(minDofs) > 
+	 inbalanceFactor) 
 	repartitioning = 1;
 
       mpiComm.Bcast(&repartitioning, 1, MPI_INT, 0);
@@ -1245,13 +1249,18 @@ namespace AMDiS {
 
     double timePoint = MPI::Wtime();
 
+
 #if (DEBUG != 0)
     ParallelDebug::testDoubleDofs(mesh);
-
-    if (repartitioningCounter == 0)
+    int writePartMesh = 1;
+#else
+    int writePartMesh = 0;
+#endif
+    Parameters::get("dbg->write part mesh", writePartMesh);
+    if (writePartMesh > 0 && repartitioningCounter == 0)
       ParallelDebug::writePartitioningFile(debugOutputDir + "partitioning", 
 					   repartitioningCounter, feSpaces[0]);
-#endif
+
     repartitioningCounter++;
 
     // === Create new element weights. ===
@@ -1696,6 +1705,10 @@ namespace AMDiS {
     ParallelDebug::testCommonDofs(*this, true);
     ParallelDebug::testGlobalIndexByCoords(*this);   
 #else
+    for (unsigned int i = 0; i < feSpaces.size(); i++)
+      MSG("FE space %d:  nRankDofs    = %d   nOverallDofs = %d\n", 
+	  i, dofMap[feSpaces[i]].nRankDofs, dofMap[feSpaces[i]].nOverallDofs);
+
     int tmp = 0;
     Parameters::get(name + "->write parallel debug file", tmp);
     if (tmp)
@@ -1816,7 +1829,7 @@ namespace AMDiS {
 	  for (unsigned int i = 0; i < (dofs.size() - nDofs); i++)
 	    rankToDofType[it->first].push_back(boundIt->type);
 	}
-	
+
 	// Send the global indices to the rank on the other side.
 	stdMpi.getSendData(it->first).reserve(dofs.size());
 	for (unsigned int i = 0; i < dofs.size(); i++)
diff --git a/AMDiS/src/parallel/ParallelDofMapping.cc b/AMDiS/src/parallel/ParallelDofMapping.cc
index 21d16e390a17527d36d8eed370ff47b1ad862df1..3475ada4fcee37b6a4c999e3ff2836b33a102b11 100644
--- a/AMDiS/src/parallel/ParallelDofMapping.cc
+++ b/AMDiS/src/parallel/ParallelDofMapping.cc
@@ -17,6 +17,26 @@ namespace AMDiS {
 
   using namespace std;
 
+
+  void DofToMatIndex::getReverse(int rowIndex, int &component, int &dofIndex)
+  {
+    FUNCNAME("DofToMatIndex::getReverse()");
+
+    for (map<int, map<DegreeOfFreedom, int> >::iterator it0 = data.begin();
+	 it0 != data.end(); ++it0)
+      for (map<DegreeOfFreedom, int>::iterator it1 = it0->second.begin();
+	   it1 != it0->second.end(); ++it1)
+	if (it1->second == rowIndex) {
+	  component = it0->first;
+	  dofIndex = it1->first;
+	  return;
+	}
+  
+    component = -1;
+    dofIndex = -1;
+  }
+
+
   void FeSpaceDofMap::clear()
   {
     dofMap.clear();
diff --git a/AMDiS/src/parallel/ParallelDofMapping.h b/AMDiS/src/parallel/ParallelDofMapping.h
index e8ff50e6f0adc883b00c4245a001ccdebb2438f3..f24c605403a987f1d09bedd61c3127eb68b54a19 100644
--- a/AMDiS/src/parallel/ParallelDofMapping.h
+++ b/AMDiS/src/parallel/ParallelDofMapping.h
@@ -80,6 +80,11 @@ namespace AMDiS {
       return data[component][dof];
     }
 
+    /// Returns for a given matrix index the component and (local or global) DOF
+    /// index. As the data structure is not made for this kind of reverse
+    /// search, this is very slow and should be only used for debugging.
+    void getReverse(int rowIndex, int &component, int &dofIndex);
+
   private:
     /// The mapping data. For each system component there is a specific map that
     /// maps global DOF indices to global matrix indices.
@@ -375,6 +380,13 @@ namespace AMDiS {
       return dofToMatIndex.get(ithComponent, d);
     }
 
+    /// Returns the component number and local/global DOF index for a given
+    /// matrix row index. Should be used for debugging only!
+    inline void getReverseMatIndex(int index, int &component, int &dofIndex)
+    {
+      dofToMatIndex.getReverse(index, component, dofIndex);
+    }
+
     /// Returns the local matrix index of a given DOF for a given 
     /// component number.
     inline int getLocalMatIndex(int ithComponent, DegreeOfFreedom d)
diff --git a/AMDiS/src/parallel/PetscSolver.h b/AMDiS/src/parallel/PetscSolver.h
index 231a9203046edd6e2191ce0e80950cbaaa1aa540..f6702593b510aed0ddffd4c927e9494775142354 100644
--- a/AMDiS/src/parallel/PetscSolver.h
+++ b/AMDiS/src/parallel/PetscSolver.h
@@ -121,6 +121,12 @@ namespace AMDiS {
       removeRhsNullSpace = b;
     }
 
+    /// Adds a new vector to the basis of the operator's nullspace.
+    void addNullspaceVector(SystemVector *vec)
+    {
+      nullspace.push_back(vec);
+    }
+
     inline bool isCoarseSpace(const FiniteElemSpace *feSpace, 
 			      DegreeOfFreedom dof)
     {
@@ -239,6 +245,9 @@ namespace AMDiS {
     /// PETSc preconditioner object
     PC pcInterior;
 
+    /// A set of vectors that span the null space of the operator.
+    vector<SystemVector*> nullspace;
+
     /// KSP database prefix
     string kspPrefix;
 
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index c7de58a86fd0e3281bb60b05036c2860b696cdd6..66010054e068863d42851fffb4cbecda649ad070 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -1428,7 +1428,7 @@ namespace AMDiS {
     FUNCNAME("PetscSolverFeti::solveReducedFetiMatrix()");
 
     // RHS vector.
-    Vec vec_rhs;
+    Vec vec_rhs, vec_sol;
 
     // Some temporary vectors.
     Vec tmp_b0, tmp_b1, tmp_lagrange0, tmp_primal0, tmp_primal1;
@@ -1450,6 +1450,7 @@ namespace AMDiS {
 
     MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange0);
     MatGetVecs(mat_lagrange, PETSC_NULL, &vec_rhs);
+    MatGetVecs(mat_lagrange, PETSC_NULL, &vec_sol);
 
     // === Create new rhs ===
 
@@ -1486,7 +1487,7 @@ namespace AMDiS {
 
 
     // === Solve with FETI-DP operator. ===
-    KSPSolve(ksp_feti, vec_rhs, vec_rhs);
+    KSPSolve(ksp_feti, vec_rhs, vec_sol);
 
 
     if (printTimings) {
@@ -1506,7 +1507,7 @@ namespace AMDiS {
     subdomain->solveGlobal(subdomain->getRhsInterior(), tmp_b0);
     MatMult(subdomain->getMatCoarseInt(), tmp_b0, tmp_primal1);
     VecAXPBY(tmp_primal0, -1.0, 1.0, tmp_primal1);
-    MatMultTranspose(mat_lagrange, vec_rhs, tmp_b0);
+    MatMultTranspose(mat_lagrange, vec_sol, tmp_b0);
     subdomain->solveGlobal(tmp_b0, tmp_b0);
     MatMult(subdomain->getMatCoarseInt(), tmp_b0, tmp_primal1);
 
@@ -1517,7 +1518,7 @@ namespace AMDiS {
     // === Solve for u_b. ===
 
     VecCopy(subdomain->getRhsInterior(), tmp_b0);
-    MatMultTranspose(mat_lagrange, vec_rhs, tmp_b1);
+    MatMultTranspose(mat_lagrange, vec_sol, tmp_b1);
     VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1);
 
     MatMult(subdomain->getMatIntCoarse(), tmp_primal0, tmp_b1);
@@ -1534,6 +1535,7 @@ namespace AMDiS {
     }
 
     VecDestroy(&vec_rhs);
+    VecDestroy(&vec_sol);
     VecDestroy(&tmp_b0);
     VecDestroy(&tmp_b1);
     VecDestroy(&tmp_lagrange0);
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
index d7b3d3ac81603e6d190afc8a7ecd9564e6cfe893..754aa2ac753e6e3d77587250c167d3536d379cb8 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
@@ -105,7 +105,7 @@ namespace AMDiS {
 
     // === Remove Dirichlet BC DOFs. ===
 
-    removeDirichletBcDofs(mat);
+    //    removeDirichletBcDofs(mat);
     
 
     // === Init PETSc solver. ===
@@ -305,7 +305,7 @@ namespace AMDiS {
 
     // === Remove Dirichlet BC DOFs. ===
 
-    removeDirichletBcDofs(mat);
+    //    removeDirichletBcDofs(mat);
 
     // === Create solver for the non primal (thus local) variables. ===
 
@@ -352,11 +352,6 @@ namespace AMDiS {
 	setDofVector(rhsInterior, vec->getDOFVector(i), i);
     }
 
-
-    // === Remove Dirichlet BC DOFs. ===
-    removeDirichletBcDofs(vec);
-
-
     VecAssemblyBegin(rhsInterior);
     VecAssemblyEnd(rhsInterior);
 
@@ -365,7 +360,11 @@ namespace AMDiS {
       VecAssemblyEnd(rhsCoarseSpace);
     }
 
-   
+
+    // === Remove Dirichlet BC DOFs. ===
+    //    removeDirichletBcDofs(vec);
+
+  
     // === Remove null space, if requested. ===
 
     if (removeRhsNullSpace) {
@@ -398,16 +397,47 @@ namespace AMDiS {
       VecAssemblyEnd(petscSolVec);
     }
 
+    MatNullSpace matNullSpace;
+    Vec nullspaceBasis;
+
+    if (nullspace.size() > 0) {
+      TEST_EXIT_DBG(nullspace.size() == 1)("Not yet implemented!\n");
+
+      VecDuplicate(petscSolVec, &nullspaceBasis);
+
+      for (int i = 0; i < nComponents; i++)
+	setDofVector(nullspaceBasis, nullspace[0]->getDOFVector(i), i, true);
+
+      VecAssemblyBegin(nullspaceBasis);
+      VecAssemblyEnd(nullspaceBasis);
+
+      MatNullSpaceCreate(mpiCommGlobal, PETSC_FALSE, 1, &nullspaceBasis, &matNullSpace);
+      KSPSetNullSpace(kspInterior, matNullSpace);
+
+      MatMult(matIntInt, nullspaceBasis, petscSolVec);
+      PetscReal n;
+      VecNorm(petscSolVec, NORM_2, &n);
+      MSG("NORM IS: %e\n", n);
+    }
+
     // PETSc.
     solve(rhsInterior, petscSolVec);
 
+
+    if (nullspace.size() > 0) {
+      MatNullSpaceDestroy(&matNullSpace);
+      VecDestroy(&nullspaceBasis);
+    }
+
+
     // === Transfere values from PETSc's solution vectors to the DOF vectors. ===
     PetscScalar *vecPointer;
-    VecGetArray(petscSolVec, &vecPointer);
+    VecGetArray(petscSolVec, &vecPointer);    
 
     int c = 0;
     for (int i = 0; i < nComponents; i++) {
       DOFVector<double> &dv = *(vec.getDOFVector(i));
+
       DofMap& d = (*interiorMap)[dv.getFeSpace()].getMap();
       for (DofMap::iterator it = d.begin(); it != d.end(); ++it)
 	if (it->second.local != -1)
@@ -469,7 +499,7 @@ namespace AMDiS {
     VecDestroy(&tmp);
     t0 += MPI::Wtime() - wtime;
 
-    MSG("TIMEING: %.5f %.5f\n", t0, t1);
+    //    MSG("TIMEING: %.5f %.5f\n", t0, t1);
   }
 
 
@@ -765,7 +795,7 @@ namespace AMDiS {
 	  std::set<int>& perAsc = perMap.getAssociations(feSpace, globalRowDof);
 	  double value = *dofIt / (perAsc.size() + 1.0);
 	  VecSetValue(vecInterior, index, value, ADD_VALUES);
-	  
+
 	  for (std::set<int>::iterator perIt = perAsc.begin(); 
 	       perIt != perAsc.end(); ++perIt) {
 	    int mappedDof = perMap.map(feSpace, *perIt, globalRowDof);
@@ -786,6 +816,8 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverGlobalMatrix::removeDirichletBcDofs()");
 
+    ERROR_EXIT("DO NOT CALL!\n");
+
     vector<int> dofsInterior, dofsCoarse;
 
     int nComponents = mat->getNumRows();
@@ -800,11 +832,15 @@ namespace AMDiS {
 	for (std::set<DegreeOfFreedom>::iterator it = dirichletDofs.begin();
 	     it != dirichletDofs.end(); ++it) {
 	  if (isCoarseSpace(feSpace, *it)) {
-	    if ((*coarseSpaceMap)[feSpace].isRankDof(*it))
-	      dofsCoarse.push_back(coarseSpaceMap->getMatIndex(i, *it));
+	    if ((*coarseSpaceMap)[feSpace].isRankDof(*it)) {
+	      int globalDof = (*coarseSpaceMap)[feSpace][*it].global;
+	      dofsCoarse.push_back(coarseSpaceMap->getMatIndex(i, globalDof));
+	    }
 	  } else {
-	    if ((*interiorMap)[feSpace].isRankDof(*it))
-	      dofsInterior.push_back(interiorMap->getMatIndex(i, *it));
+	    if ((*interiorMap)[feSpace].isRankDof(*it)) {
+	      int globalDof = (*interiorMap)[feSpace][*it].global;
+	      dofsInterior.push_back(interiorMap->getMatIndex(i, globalDof));
+	    }
 	  }
 	}
       } else {
@@ -825,6 +861,8 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverGlobalMatrix::removeDirichletBcDofs()");
 
+    ERROR_EXIT("DO NOT CALL!\n");
+
     int nComponents = vec->getSize();
     for (int i = 0; i < nComponents; i++) {
       const FiniteElemSpace *feSpace = vec->getDOFVector(i)->getFeSpace();
@@ -832,32 +870,31 @@ namespace AMDiS {
       map<DegreeOfFreedom, double> &dirichletValues = 
 	vec->getDOFVector(i)->getDirichletValues();
 
-      MSG("DIRICHLET DOFS: %d -> %d\n", i, dirichletValues.size());
-
-      MSG("MAT IS GLOBAL: %d\n", interiorMap->isMatIndexFromGlobal());
-// 	if (interiorMap->isMatIndexFromGlobal())
-// 	  index = 
-// 	    interiorMap->getMatIndex(nRowVec, globalRowDof) + rStartInterior;
-// 	else
-// 	  index =
-// 	    interiorMap->getMatIndex(nRowVec, dofIt.getDOFIndex()) + rStartInterior;
-
-      
       for (map<DegreeOfFreedom, double>::iterator it = dirichletValues.begin();
 	   it != dirichletValues.end(); ++it) {
 	if (isCoarseSpace(feSpace, it->first)) {
-	  if ((*coarseSpaceMap)[feSpace].isRankDof(it->first))
-	    VecSetValue(rhsCoarseSpace, coarseSpaceMap->getMatIndex(i, it->first), 
+	  if ((*coarseSpaceMap)[feSpace].isRankDof(it->first)) {
+	    int globalDof = (*coarseSpaceMap)[feSpace][it->first].global;
+	    VecSetValue(rhsCoarseSpace, coarseSpaceMap->getMatIndex(i, globalDof), 
 			it->second, INSERT_VALUES);
+	  }
 	} else {
 	  if ((*interiorMap)[feSpace].isRankDof(it->first)) {
-	    MSG("REMOVE: %d %d %d\n", i, it->first, interiorMap->getMatIndex(i, it->first));
-	    VecSetValue(rhsInterior, interiorMap->getMatIndex(i, it->first),
+	    int globalDof = (*interiorMap)[feSpace][it->first].global;
+	    VecSetValue(rhsInterior, interiorMap->getMatIndex(i, globalDof),
 			it->second, INSERT_VALUES);	    
 	  }
 	}
       }
     }
+
+    VecAssemblyBegin(rhsInterior);
+    VecAssemblyEnd(rhsInterior);
+
+    if (coarseSpaceMap) {
+      VecAssemblyBegin(rhsCoarseSpace);
+      VecAssemblyEnd(rhsCoarseSpace);
+    }
   }