diff --git a/AMDiS/src/parallel/PetscProblemStat.cc b/AMDiS/src/parallel/PetscProblemStat.cc
index 0bde5bede490604dd104b21af00d637a14e260ff..6e03c86bca056e5812326ef8ba64880b3301f220 100644
--- a/AMDiS/src/parallel/PetscProblemStat.cc
+++ b/AMDiS/src/parallel/PetscProblemStat.cc
@@ -89,7 +89,6 @@ namespace AMDiS {
 
     petscSolver->fillPetscRhs(rhs);
 
-
 #if 0
     processMemUsage(vm, rss);   
     MSG("STAGE 2\n");
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 7930f7254b2405a84045271b13d9e7e47de1be27..688fb21467c8fcc8cb81d5bc4de6819969a7daa6 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -18,6 +18,7 @@ namespace AMDiS {
 
   using namespace std;
 
+  FetiStatisticsData PetscSolverFeti::fetiStatistics;
 
 #ifdef HAVE_PETSC_DEV 
   // y = mat * x
@@ -25,17 +26,20 @@ namespace AMDiS {
   {
     // S_PiPi = K_PiPi - K_PiB inv(K_BB) K_BPi
 
+    double first = MPI::Wtime();
     void *ctx;
     MatShellGetContext(mat, &ctx);
     SchurPrimalData* data = static_cast<SchurPrimalData*>(ctx);
 
     MatMult(*(data->mat_b_primal), x, data->tmp_vec_b);
-    KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b);
-
+    data->fetiSolver->solveLocalProblem(data->tmp_vec_b, data->tmp_vec_b);
     MatMult(*(data->mat_primal_b), data->tmp_vec_b, data->tmp_vec_primal);
     MatMult(*(data->mat_primal_primal), x, y);
     VecAXPBY(y, -1.0, 1.0, data->tmp_vec_primal);
 
+    PetscSolverFeti::fetiStatistics.nSchurPrimalApply++;
+    PetscSolverFeti::fetiStatistics.timeSchurPrimalApply += MPI::Wtime() - first;
+
     return 0;
   }
 
@@ -52,17 +56,25 @@ namespace AMDiS {
 
     // tmp_vec_b0 = inv(K_BB) trans(L) x
     MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b0);
-    KSPSolve(*(data->ksp_b), data->tmp_vec_b0, data->tmp_vec_b0);
+    data->fetiSolver->solveLocalProblem(data->tmp_vec_b0, data->tmp_vec_b0);
 
     // tmp_vec_b1 = inv(K_BB) K_BPi  inv(S_PiPi) K_PiB tmp_vec_b0
     MatMult(*(data->mat_primal_b), data->tmp_vec_b0, data->tmp_vec_primal);
+
+    double first = MPI::Wtime();
     KSPSolve(*(data->ksp_schur_primal), data->tmp_vec_primal, data->tmp_vec_primal);
+    PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++;
+    PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve +=
+      MPI::Wtime() - first;
+
     MatMult(*(data->mat_b_primal), data->tmp_vec_primal, data->tmp_vec_b1);
 
     // y = L (tmp_vec_b0 + tmp_vec_b1)
     VecAXPBY(data->tmp_vec_b0, 1.0, 1.0, data->tmp_vec_b1);
     MatMult(*(data->mat_lagrange), data->tmp_vec_b0, y);
 
+    PetscSolverFeti::fetiStatistics.nFetiApply++;
+
     return 0;
   }
 
@@ -183,7 +195,8 @@ namespace AMDiS {
 
   PetscSolverFeti::PetscSolverFeti()
     : PetscSolver(),
-      nComponents(-1)
+      nComponents(-1),
+      schurPrimalSolver(0)
   {
     FUNCNAME("PetscSolverFeti::PetscSolverFeti()");
 
@@ -196,8 +209,14 @@ namespace AMDiS {
     } else if (preconditionerName == "lumped") {
       fetiPreconditioner = FETI_LUMPED;
     } else {
-      ERROR_EXIT("Preconditioner \"%s\" not available!\n", preconditionerName.c_str());
+      ERROR_EXIT("Preconditioner \"%s\" not available!\n", 
+		 preconditionerName.c_str());
     }
+
+    Parameters::get("parallel->feti->schur primal solver", schurPrimalSolver);
+    TEST_EXIT(schurPrimalSolver == 0 || schurPrimalSolver == 1)
+      ("Wrong solver \"%d\"for the Schur primal complement!\n", 
+       schurPrimalSolver);
   }
 
 
@@ -492,7 +511,7 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createIndeB()");
 
-    globalIndexB.clear();
+    localIndexB.clear();
     DOFAdmin* admin = meshDistributor->getFeSpace()->getAdmin();
 
     // === To ensure that all interior node on each rank are listen first in ===
@@ -502,15 +521,15 @@ namespace AMDiS {
     for (int i = 0; i < admin->getUsedSize(); i++)
       if (admin->isDofFree(i) == false && primals.count(i) == 0)
 	if (duals.count(i) == 0 && primals.count(i) == 0)
-	  globalIndexB[i] = -1;
+	  localIndexB[i] = -1;
 
 
     // === Get correct index for all interior nodes. ===
 
     nLocalInterior = 0;
-    for (DofMapping::iterator it = globalIndexB.begin(); 
-	 it != globalIndexB.end(); ++it) {
-      it->second = nLocalInterior + rStartB;
+    for (DofMapping::iterator it = localIndexB.begin(); 
+	 it != localIndexB.end(); ++it) {
+      it->second = nLocalInterior;
       nLocalInterior++;
     }
     nLocalDuals = duals.size();
@@ -524,7 +543,7 @@ namespace AMDiS {
 
     for (DofIndexSet::iterator it = duals.begin();
 	 it != duals.end(); ++it)
-      globalIndexB[*it] = globalDualIndex[*it];
+      localIndexB[*it] = globalDualIndex[*it] - rStartB;
   }
 
 
@@ -590,31 +609,47 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createSchurPrimal()");
 
-    schurPrimalData.mat_primal_primal = &mat_primal_primal;
-    schurPrimalData.mat_primal_b = &mat_primal_b;
-    schurPrimalData.mat_b_primal = &mat_b_primal;
-    schurPrimalData.ksp_b = &ksp_b;
+    MSG("MAT SCHUR PRIMAL SOLVER = %d\n", schurPrimalSolver);
 
-    VecCreateMPI(PETSC_COMM_WORLD, 
-		 nRankB * nComponents, nOverallB * nComponents,
-		 &(schurPrimalData.tmp_vec_b));
-    VecCreateMPI(PETSC_COMM_WORLD, 
-		 nRankPrimals * nComponents, nOverallPrimals * nComponents,
-		 &(schurPrimalData.tmp_vec_primal));
+    if (schurPrimalSolver == 0) {
+      schurPrimalData.mat_primal_primal = &mat_primal_primal;
+      schurPrimalData.mat_primal_b = &mat_primal_b;
+      schurPrimalData.mat_b_primal = &mat_b_primal;
+      schurPrimalData.fetiSolver = this;
+      
+      VecCreateMPI(PETSC_COMM_WORLD, 
+		   nRankB * nComponents, nOverallB * nComponents,
+		   &(schurPrimalData.tmp_vec_b));
+      VecCreateMPI(PETSC_COMM_WORLD, 
+		   nRankPrimals * nComponents, nOverallPrimals * nComponents,
+		   &(schurPrimalData.tmp_vec_primal));
+
+      MatCreateShell(PETSC_COMM_WORLD,
+		     nRankPrimals * nComponents, nRankPrimals * nComponents,
+		     nOverallPrimals * nComponents, nOverallPrimals * nComponents,
+		     &schurPrimalData, 
+		     &mat_schur_primal);
+      MatShellSetOperation(mat_schur_primal, MATOP_MULT, 
+			   (void(*)(void))petscMultMatSchurPrimal);
+      
+      KSPCreate(PETSC_COMM_WORLD, &ksp_schur_primal);
+      KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN);
+      KSPSetOptionsPrefix(ksp_schur_primal, "solver_sp_");
+      KSPSetFromOptions(ksp_schur_primal);
+    } else {
+      MatDuplicate(mat_primal_primal, MAT_COPY_VALUES, &mat_schur_primal);
 
+      Mat tmp0, tmp1;
+      MatMatSolve(mat_b_b, mat_b_primal, tmp0);
+      MatMatMult(mat_primal_b, tmp0, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tmp1);
 
-    MatCreateShell(PETSC_COMM_WORLD,
-		   nRankPrimals * nComponents, nRankPrimals * nComponents,
-		   nOverallPrimals * nComponents, nOverallPrimals * nComponents,
-		   &schurPrimalData, 
-		   &mat_schur_primal);
-    MatShellSetOperation(mat_schur_primal, MATOP_MULT, 
-			 (void(*)(void))petscMultMatSchurPrimal);
-
-    KSPCreate(PETSC_COMM_WORLD, &ksp_schur_primal);
-    KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN);
-    KSPSetOptionsPrefix(ksp_schur_primal, "solver_sp_");
-    KSPSetFromOptions(ksp_schur_primal);
+      MatDestroy(&tmp0);
+      MatDestroy(&tmp1);
+
+      MSG("EXIT!\n");
+
+      exit(0);
+    }
   }
 
 
@@ -625,7 +660,7 @@ namespace AMDiS {
     schurPrimalData.mat_primal_primal = PETSC_NULL;
     schurPrimalData.mat_primal_b = PETSC_NULL;
     schurPrimalData.mat_b_primal = PETSC_NULL;
-    schurPrimalData.ksp_b = PETSC_NULL;
+    schurPrimalData.fetiSolver = NULL;
 
     VecDestroy(&schurPrimalData.tmp_vec_b);
     VecDestroy(&schurPrimalData.tmp_vec_primal);
@@ -645,7 +680,7 @@ namespace AMDiS {
     fetiData.mat_primal_b = &mat_primal_b;
     fetiData.mat_b_primal = &mat_b_primal;
     fetiData.mat_lagrange = &mat_lagrange;
-    fetiData.ksp_b = &ksp_b;
+    fetiData.fetiSolver = this;
     fetiData.ksp_schur_primal = &ksp_schur_primal;
 
     VecCreateMPI(PETSC_COMM_WORLD, 
@@ -681,7 +716,8 @@ namespace AMDiS {
     switch (fetiPreconditioner) {
     case FETI_DIRICHLET:           
       KSPCreate(PETSC_COMM_SELF, &ksp_interior);
-      KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, SAME_NONZERO_PATTERN);
+      KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, 
+		      SAME_NONZERO_PATTERN);
       KSPSetOptionsPrefix(ksp_interior, "solver_interior_");
       KSPSetFromOptions(ksp_interior);
             
@@ -736,7 +772,7 @@ namespace AMDiS {
     fetiData.mat_primal_b = PETSC_NULL;
     fetiData.mat_b_primal = PETSC_NULL;
     fetiData.mat_lagrange = PETSC_NULL;
-    fetiData.ksp_b = PETSC_NULL;
+    fetiData.fetiSolver = NULL;
     fetiData.ksp_schur_primal = PETSC_NULL;
 
     VecDestroy(&fetiData.tmp_vec_b0);
@@ -844,9 +880,9 @@ namespace AMDiS {
     for (int i = 0; i < nComponents; i++) {
       DOFVector<double>& dofVec = *(vec.getDOFVector(i));
 
-      for (DofMapping::iterator it = globalIndexB.begin();
-	   it != globalIndexB.end(); ++it) {
-	int petscIndex = (it->second - rStartB) * nComponents + i;
+      for (DofMapping::iterator it = localIndexB.begin();
+	   it != localIndexB.end(); ++it) {
+	int petscIndex = it->second * nComponents + i;
 	dofVec[it->first] = localSolB[petscIndex];
       }
 
@@ -886,9 +922,12 @@ namespace AMDiS {
     int nRowsInterior = nLocalInterior * nComponents;
     int nRowsDual = nLocalDuals * nComponents;
 
-    MatCreateMPIAIJ(PETSC_COMM_WORLD,
-		    nRowsRankB, nRowsRankB, nRowsOverallB, nRowsOverallB,
-		    30, PETSC_NULL, 0, PETSC_NULL, &mat_b_b);
+//     MatCreateMPIAIJ(PETSC_COMM_WORLD,
+// 		    nRowsRankB, nRowsRankB, nRowsOverallB, nRowsOverallB,
+// 		    30, PETSC_NULL, 0, PETSC_NULL, &mat_b_b);
+
+    MatCreateSeqAIJ(PETSC_COMM_SELF, nRowsRankB, nRowsRankB, 30, PETSC_NULL,
+		    &mat_b_b);
 
     MatCreateMPIAIJ(PETSC_COMM_WORLD,
 		    nRowsRankPrimal, nRowsRankPrimal, 
@@ -1004,10 +1043,11 @@ namespace AMDiS {
 	    } else {
 	      // Column is not a primal variable.
 
-	      TEST_EXIT_DBG(globalIndexB.count(col(*icursor)))
+	      TEST_EXIT_DBG(localIndexB.count(col(*icursor)))
 		("No global B index for DOF %d!\n", col(*icursor));
 	      
-	      int colIndex = globalIndexB[col(*icursor)] * nComponents + j;
+	      int colIndex = 
+		(localIndexB[col(*icursor)] + rStartB) * nComponents + j;
 
 	      if (rowPrimal) {
 		colsOther.push_back(colIndex);
@@ -1023,33 +1063,31 @@ namespace AMDiS {
 	    // === For preconditioner ===
 
 	    if (!rowPrimal && !colPrimal) {
-	      int rowIndex = globalIndexB[*cursor] - rStartB;
-	      int colIndex = globalIndexB[col(*icursor)] - rStartB;
+	      int rowIndex = localIndexB[*cursor];
+	      int colIndex = localIndexB[col(*icursor)];
 		
 	      if (rowIndex < nLocalInterior) {
 		if (colIndex < nLocalInterior) {
-		  int colIndex2 = 
-		    (globalIndexB[col(*icursor)] - rStartB) * nComponents + j;
+		  int colIndex2 = localIndexB[col(*icursor)] * nComponents + j;
 
 		  colsLocal.push_back(colIndex2);
 		  valuesLocal.push_back(value(*icursor));
 		} else {
-		  int colIndex2 = 
-		    (globalIndexB[col(*icursor)] - rStartB - nLocalInterior) * nComponents + j;
+		  int colIndex2 = (localIndexB[col(*icursor)] - nLocalInterior) *
+		    nComponents + j;
 
 		  colsLocalOther.push_back(colIndex2);
 		  valuesLocalOther.push_back(value(*icursor));
 		}
 	      } else {
 		if (colIndex < nLocalInterior) {
-		  int colIndex2 = 
-		    (globalIndexB[col(*icursor)] - rStartB) * nComponents + j;
+		  int colIndex2 = localIndexB[col(*icursor)] * nComponents + j;
 
 		  colsLocalOther.push_back(colIndex2);
 		  valuesLocalOther.push_back(value(*icursor));
 		} else {
-		  int colIndex2 = 
-		    (globalIndexB[col(*icursor)] - rStartB - nLocalInterior) * nComponents + j;
+		  int colIndex2 = (localIndexB[col(*icursor)] - nLocalInterior) *
+		    nComponents + j;
 
 		  colsLocal.push_back(colIndex2);
 		  valuesLocal.push_back(value(*icursor));
@@ -1072,10 +1110,13 @@ namespace AMDiS {
 	      MatSetValues(mat_primal_b, 1, &rowIndex, colsOther.size(),
 			   &(colsOther[0]), &(valuesOther[0]), ADD_VALUES);
 	  } else {
-	    TEST_EXIT_DBG(globalIndexB.count(*cursor))
+	    TEST_EXIT_DBG(localIndexB.count(*cursor))
 	      ("Should not happen!\n");
 
-	    int rowIndex = globalIndexB[*cursor] * nComponents + i;
+	    //	    int rowIndex = (localIndexB[*cursor] + rStartB) * nComponents + i;
+	    int rowIndex = localIndexB[*cursor] * nComponents + i;
+	    for (unsigned int k = 0; k < cols.size(); k++)
+	      cols[k] -= rStartB * nComponents;
 	    MatSetValues(mat_b_b, 1, &rowIndex, cols.size(),
 			 &(cols[0]), &(values[0]), ADD_VALUES);
 
@@ -1090,11 +1131,10 @@ namespace AMDiS {
 	    switch (fetiPreconditioner) {
 	    case FETI_DIRICHLET:
 	      {
-		int rowIndex = globalIndexB[*cursor] - rStartB;
+		int rowIndex = localIndexB[*cursor];
 		
 		if (rowIndex < nLocalInterior) {
-		  int rowIndex2 = 
-		    (globalIndexB[*cursor] - rStartB) * nComponents + i;
+		  int rowIndex2 = localIndexB[*cursor] * nComponents + i;
 		  
 		  MatSetValues(mat_interior_interior, 1, &rowIndex2, colsLocal.size(),
 			       &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES);
@@ -1104,7 +1144,7 @@ namespace AMDiS {
 				 &(colsLocalOther[0]), &(valuesLocalOther[0]), INSERT_VALUES);
 		} else {
 		  int rowIndex2 = 
-		    (globalIndexB[*cursor] - rStartB - nLocalInterior) * nComponents + i;
+		    (localIndexB[*cursor] - nLocalInterior) * nComponents + i;
 		  
 		  MatSetValues(mat_duals_duals, 1, &rowIndex2, colsLocal.size(),
 			       &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES);
@@ -1119,11 +1159,11 @@ namespace AMDiS {
 
 	    case FETI_LUMPED:
 	      {
-		int rowIndex = globalIndexB[*cursor] - rStartB;
+		int rowIndex = localIndexB[*cursor];
 		
 		if (rowIndex >= nLocalInterior) {
 		  int rowIndex2 = 
-		    (globalIndexB[*cursor] - rStartB - nLocalInterior) * nComponents + i;
+		    (localIndexB[*cursor] - nLocalInterior) * nComponents + i;
 		  
 		  MatSetValues(mat_duals_duals, 1, &rowIndex2, colsLocal.size(),
 			       &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES);		
@@ -1186,7 +1226,7 @@ namespace AMDiS {
 
     // === Create solver for the non primal (thus local) variables. ===
 
-    KSPCreate(PETSC_COMM_WORLD, &ksp_b);
+    KSPCreate(PETSC_COMM_SELF, &ksp_b);
     KSPSetOperators(ksp_b, mat_b_b, mat_b_b, SAME_NONZERO_PATTERN);
     KSPSetOptionsPrefix(ksp_b, "solver_b_");
     KSPSetFromOptions(ksp_b);
@@ -1200,7 +1240,8 @@ namespace AMDiS {
     int nComponents = vec->getSize();
 
     VecCreateMPI(PETSC_COMM_WORLD, 
-		 nRankB * nComponents, nOverallB * nComponents, &f_b);
+		 nRankB * nComponents, 
+		 nOverallB * nComponents, &f_b);
     VecCreateMPI(PETSC_COMM_WORLD, 
 		 nRankPrimals * nComponents, 
 		 nOverallPrimals * nComponents, &f_primal);
@@ -1217,12 +1258,11 @@ namespace AMDiS {
 	  double value = *dofIt;
 	  VecSetValues(f_primal, 1, &index, &value, ADD_VALUES);
 	} else {
-	  TEST_EXIT_DBG(globalIndexB.count(index))
+	  TEST_EXIT_DBG(localIndexB.count(index))
 	    ("Should not happen!\n");
 
-	  index = globalIndexB[index] * nComponents + i;
-	  double value = *dofIt;
-	  VecSetValues(f_b, 1, &index, &value, ADD_VALUES);
+	  index = (localIndexB[index] + rStartB) * nComponents + i;
+	  VecSetValue(f_b, index, *dofIt, INSERT_VALUES);
 	}      
       }
     }
@@ -1234,134 +1274,38 @@ namespace AMDiS {
     VecAssemblyEnd(f_primal);
   }
 
-
-  void PetscSolverFeti::solveFetiMatrix(SystemVector &vec)
-  {
-    FUNCNAME("PetscSolverFeti::solveFetiMatrix()");
-
-    Mat  mat_lagrange_transpose;
-    MatTranspose(mat_lagrange, MAT_INITIAL_MATRIX, &mat_lagrange_transpose);
-
-    // === Create nested matrix which will contain the overall FETI system. ===
-
-    Mat A;
-    Mat nestedA[3][3];
-    nestedA[0][0] = mat_b_b;
-    nestedA[0][1] = mat_b_primal;
-    nestedA[0][2] = mat_lagrange_transpose;
-    nestedA[1][0] = mat_primal_b;
-    nestedA[1][1] = mat_primal_primal;
-    nestedA[1][2] = PETSC_NULL;
-    nestedA[2][0] = mat_lagrange;
-    nestedA[2][1] = PETSC_NULL;
-    nestedA[2][2] = PETSC_NULL;
-
-    MatCreateNest(PETSC_COMM_WORLD, 3, PETSC_NULL, 3, PETSC_NULL, &(nestedA[0][0]), &A);
-
-    MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
-    MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
   
+  void PetscSolverFeti::solveLocalProblem(Vec &rhs, Vec &sol)
+  {
+    FUNCNAME("PetscSolverFeti::solveLocalProblem()");
 
+    Vec tmp;
+    VecCreateSeq(PETSC_COMM_SELF, nRankB * nComponents, &tmp);
+    PetscScalar *tmpValues;
+    VecGetArray(tmp, &tmpValues);
 
-    int nRankNest = (nRankB + nRankPrimals + nRankLagrange) * nComponents;
-    int nOverallNest = (nOverallB + nOverallPrimals + nOverallLagrange) * nComponents;
-    int rStartNest = (rStartB + rStartPrimals + rStartLagrange) * nComponents;
-
-    {
-      // === Test some matrix sizes. ===
-
-      int matRow, matCol;
-      MatGetLocalSize(A, &matRow, &matCol);
-      TEST_EXIT_DBG(matRow == nRankNest)("Should not happen!\n");
-      mpi::globalAdd(matRow);
-      TEST_EXIT_DBG(matRow == nOverallNest)("Should not happen!\n");
-
-      MatGetOwnershipRange(A, &matRow, &matCol);
-      TEST_EXIT_DBG(matRow == rStartNest)("Should not happen!\n");
-    }
-
-    // === Create rhs and solution vectors for the overall FETI system. ===
-
-    Vec f, b;
-    VecCreateMPI(PETSC_COMM_WORLD, nRankNest, nOverallNest, &f);
-    VecDuplicate(f, &b);
-
-    
-    // === Fill rhs vector by coping the primal and non primal PETSc vectors. ===
-
-    PetscScalar *local_f_b;
-    VecGetArray(f_b, &local_f_b);
-
-    PetscScalar *local_f_primal;
-    VecGetArray(f_primal, &local_f_primal);
-
-    {
-      int size;
-      VecGetLocalSize(f_b, &size);
-      TEST_EXIT_DBG(size == nRankB * nComponents)("Should not happen!\n");
-      VecGetLocalSize(f_primal, &size);
-      TEST_EXIT_DBG(size == nRankPrimals * nComponents)("Should not happen!\n");
-    }
-
-    PetscScalar *local_f;
-    VecGetArray(f, &local_f);
+    PetscScalar *rhsValues;
+    VecGetArray(rhs, &rhsValues);
 
-    int index = 0;
     for (int i = 0; i < nRankB * nComponents; i++)
-      local_f[index++] = local_f_b[i];
-    for (int i = 0; i < nRankPrimals * nComponents; i++)
-      local_f[index++] = local_f_primal[i];
+      tmpValues[i] = rhsValues[i];
 
-    VecRestoreArray(f, &local_f);  
-    VecRestoreArray(f_b, &local_f_b);
-    VecRestoreArray(f_primal, &local_f_primal);
-
-    
-    // === Create solver and solve the overall FETI system. ===
-
-    KSP ksp;
-    KSPCreate(PETSC_COMM_WORLD, &ksp);
-    KSPSetOperators(ksp, A, A, SAME_NONZERO_PATTERN);
-    KSPSetFromOptions(ksp);
+    VecRestoreArray(rhs, &rhsValues);
+    VecRestoreArray(tmp, &tmpValues);
 
+    KSPSolve(ksp_b, tmp, tmp);
 
-    KSPSolve(ksp, f, b);
+    VecGetArray(tmp, &tmpValues);
+    PetscScalar *solValues;
+    VecGetArray(sol, &solValues);
 
+    for (int i = 0; i < nRankB * nComponents; i++) 
+      solValues[i] = tmpValues[i];
 
-    // === Reconstruct FETI solution vectors. ===
-    
-    Vec u_b, u_primal;
-    VecDuplicate(f_b, &u_b);
-    VecDuplicate(f_primal, &u_primal);
-    
+    VecRestoreArray(sol, &solValues);
+    VecRestoreArray(tmp, &tmpValues);
 
-    PetscScalar *local_b;
-    VecGetArray(b, &local_b);
-
-    PetscScalar *local_u_b;
-    VecGetArray(u_b, &local_u_b);
-
-    PetscScalar *local_u_primal;
-    VecGetArray(u_primal, &local_u_primal);
-
-    index = 0;
-    for (int i = 0; i < nRankB * nComponents; i++)
-      local_u_b[i] = local_b[index++];
-    for (int i = 0; i < nRankPrimals * nComponents; i++)
-      local_u_primal[i] = local_b[index++];
-
-    VecRestoreArray(f, &local_b);
-    VecRestoreArray(u_b, &local_u_b);
-    VecRestoreArray(u_primal, &local_u_primal);
-
-    recoverSolution(u_b, u_primal, vec);
-
-    VecDestroy(&u_b);
-    VecDestroy(&u_primal);
-    VecDestroy(&b);
-    VecDestroy(&f);
-
-    KSPDestroy(&ksp);
+    VecDestroy(&tmp);
   }
 
 
@@ -1376,8 +1320,8 @@ namespace AMDiS {
     Vec tmp_b0, tmp_b1, tmp_lagrange0, tmp_primal0, tmp_primal1;
     MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange0);
     MatGetVecs(mat_lagrange, PETSC_NULL, &vec_rhs);
-    MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b0);
-    MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b1);
+    VecDuplicate(f_b, &tmp_b0);
+    VecDuplicate(f_b, &tmp_b1);
     MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal0);
     MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal1);
 
@@ -1387,7 +1331,8 @@ namespace AMDiS {
     //    d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B]
 
     // vec_rhs = L * inv(K_BB) * f_B
-    KSPSolve(ksp_b, f_b, tmp_b0);
+    solveLocalProblem(f_b, tmp_b0);
+
     MatMult(mat_lagrange, tmp_b0, vec_rhs);
 
     // tmp_primal0 = M_PiB * inv(K_BB) * f_B
@@ -1397,11 +1342,15 @@ namespace AMDiS {
     VecAXPBY(tmp_primal0, 1.0, -1.0, f_primal);
 
     // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
+    double first = MPI::Wtime();
     KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
+    PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++;
+    PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve +=
+      MPI::Wtime() - first;
 
     //
     MatMult(mat_b_primal, tmp_primal0, tmp_b0);
-    KSPSolve(ksp_b, tmp_b0, tmp_b0);
+    solveLocalProblem(tmp_b0, tmp_b0);
     MatMult(mat_lagrange, tmp_b0, tmp_lagrange0);
 
     //
@@ -1410,23 +1359,26 @@ namespace AMDiS {
 
     // === Solve with FETI-DP operator. ===
 
+    first = MPI::Wtime();
     KSPSolve(ksp_feti, vec_rhs, vec_rhs);
+    fetiStatistics.timeFetiApply = MPI::Wtime() -first;
    
     // === Solve for u_primals. ===
 
     VecCopy(f_primal, tmp_primal0);
-
-    KSPSolve(ksp_b, f_b, tmp_b0);
+    solveLocalProblem(f_b, tmp_b0);
     MatMult(mat_primal_b, tmp_b0, tmp_primal1);
-
     VecAXPBY(tmp_primal0, -1.0, 1.0, tmp_primal1);
-
     MatMultTranspose(mat_lagrange, vec_rhs, tmp_b0);
-    KSPSolve(ksp_b, tmp_b0, tmp_b0);
+    solveLocalProblem(tmp_b0, tmp_b0);
     MatMult(mat_primal_b, tmp_b0, tmp_primal1);
 
     VecAXPBY(tmp_primal0, 1.0, 1.0, tmp_primal1);
+    first = MPI::Wtime();
     KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
+    PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++;
+    PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve +=
+      MPI::Wtime() - first;
 
     
     // === Solve for u_b. ===
@@ -1437,8 +1389,7 @@ namespace AMDiS {
 
     MatMult(mat_b_primal, tmp_primal0, tmp_b1);
     VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1);
-
-    KSPSolve(ksp_b, tmp_b0, tmp_b0);
+    solveLocalProblem(tmp_b0, tmp_b0);
 
     // === And recover AMDiS solution vectors. ===
     
@@ -1487,6 +1438,7 @@ namespace AMDiS {
     KSPDestroy(&ksp_b);
   }
 
+
   void PetscSolverFeti::solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo)
   {
     FUNCNAME("PetscSolverFeti::solvePetscMatrix()");
@@ -1494,18 +1446,42 @@ namespace AMDiS {
     int debug = 0;
     Parameters::get("parallel->debug feti", debug);
 
-    if (debug) {
-      WARNING("FETI matrix is solved globally, thus without reducing to the lagrange multipliers!\n");
-
-      solveFetiMatrix(vec);
-    } else {
-      solveReducedFetiMatrix(vec);
-    } 
+    resetStatistics();
+    solveReducedFetiMatrix(vec);
+    printStatistics();
 
     MeshDistributor::globalMeshDistributor->synchVector(vec);
   }
 
 
+  void PetscSolverFeti::resetStatistics()
+  {
+    fetiStatistics.nFetiApply = 0;
+    fetiStatistics.timeFetiApply = 0.0;
+    fetiStatistics.nSchurPrimalApply = 0;
+    fetiStatistics.timeSchurPrimalApply = 0.0;
+    fetiStatistics.nSchurPrimalSolve = 0;
+    fetiStatistics.timeSchurPrimalSolve = 0.0;
+    fetiStatistics.nLocalSolve = 0;
+    fetiStatistics.timeLocalSolve = 0.0;
+  }
+
+
+  void PetscSolverFeti::printStatistics()
+  {
+    MSG("PetscSolverFeti::printStatistics()");
+
+    if (MPI::COMM_WORLD.Get_rank() == 0)
+      MSG("FETI-STATISTICS: %d %f %d %f %d %f %d %f\n",
+	  fetiStatistics.nFetiApply,
+	  fetiStatistics.timeFetiApply,
+	  fetiStatistics.nSchurPrimalApply,
+	  fetiStatistics.timeSchurPrimalApply,
+	  fetiStatistics.nSchurPrimalSolve,
+	  fetiStatistics.timeSchurPrimalSolve,
+	  fetiStatistics.nLocalSolve,
+	  fetiStatistics.timeLocalSolve);	  
+  }
 #endif
 
 }
diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h
index 6607b79cef06560eb5bf00e23da9a550096432b3..e9feae304362908a6f6dc8f8b2c4186ab82110c7 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.h
+++ b/AMDiS/src/parallel/PetscSolverFeti.h
@@ -33,6 +33,8 @@ namespace AMDiS {
 
 #ifdef HAVE_PETSC_DEV
 
+  class PetscSolverFeti;
+
   /** \brief
    * This structure is used when defining the MatShell operation for solving 
    * primal schur complement. \ref petscMultMatSchurPrimal
@@ -50,8 +52,7 @@ namespace AMDiS {
     /// Temporal vecor in the primal variables.
     Vec tmp_vec_primal;
 
-    /// Pointer to the solver for \ref PetscSolverFeti::mat_bb.
-    KSP *ksp_b;
+    PetscSolverFeti *fetiSolver;
   };
 
 
@@ -76,8 +77,7 @@ namespace AMDiS {
     /// Temporal vector on the primal variables.
     Vec tmp_vec_primal;
 
-    /// Pointer to the solver for \ref PetscSolverFeti::mat_bb.
-    KSP *ksp_b;
+    PetscSolverFeti *fetiSolver;
 
     /// Pointer to the solver of the schur complement on the primal variables.
     KSP *ksp_schur_primal;
@@ -125,6 +125,34 @@ namespace AMDiS {
   } FetiPreconditioner;
 
 
+  struct FetiStatisticsData
+  {
+    /// Number of application of the FETI-DP operator.
+    int nFetiApply;
+
+    /// Time for solving the reduced FETI system.
+    double timeFetiApply;
+
+    /// Number of application of the Schur primal operator.
+    int nSchurPrimalApply;
+
+    /// Time for appling the Schur primal operator.
+    double timeSchurPrimalApply;
+
+    /// Number of solution of the Schur primal system.
+    int nSchurPrimalSolve;
+
+    /// Time for solving the Schur primal system.
+    double timeSchurPrimalSolve;
+
+    /// Number of solution of the local subdomain problems.
+    int nLocalSolve;
+
+    /// Time for solving the local subdomain problems.
+    double timeLocalSolve;
+  };
+
+
   /** \brief
    * FETI-DP implementation based on PETSc.
    */
@@ -156,6 +184,8 @@ namespace AMDiS {
 	MeshDistributor::BOUNDARY_FILL_INFO_RECV_DOFS;
     }
 
+    void solveLocalProblem(Vec &rhs, Vec &sol);
+
   protected:
     /// After mesh changes, or if the solver is called the first time, this
     /// function creates all matrix and vector objects with the approriated
@@ -212,15 +242,6 @@ namespace AMDiS {
 			 Vec &vec_sol_primal,
 			 SystemVector &vec);
 
-    /** \brief
-     * Solves the FETI-DP system globally, thus without reducing it to the 
-     * Lagrange multipliers. This should be used for debugging only to test
-     * if the FETI-DP system is setup correctly.
-     *
-     * \param[out]  vec    Solution DOF vectors.
-     */
-    void solveFetiMatrix(SystemVector &vec);
-
     /** \brief
      * Solves the FETI-DP system with reducing it first to the Lagrange
      * multipliers. This is what one expects when using the FETI-DP methid :)
@@ -229,6 +250,10 @@ namespace AMDiS {
      */
     void solveReducedFetiMatrix(SystemVector &vec);
 
+    void resetStatistics();
+
+    void printStatistics();
+
   protected:
     /// Number of components in the PDE to be solved.
     int nComponents;
@@ -262,7 +287,7 @@ namespace AMDiS {
     
     /// Index for each non primal variables to the global index of
     /// B variables.
-    DofMapping globalIndexB;
+    DofMapping globalIndexB, localIndexB;
 
     /// Number of non primal, thus B, variables on rank and globally.
     int nRankB, nOverallB, rStartB;    
@@ -287,6 +312,11 @@ namespace AMDiS {
     /// variables, \ref mat_b_b
     KSP ksp_b;
 
+    /// 0: Solve the Schur complement on primal variables with iterative solver.
+    /// 1: Create the Schur complement matrix explicitly and solve it with a
+    ///    direct solver.
+    int schurPrimalSolver;
+
     /// PETSc solver object to solve the Schur complement on the 
     /// primal variables.
     KSP ksp_schur_primal;
@@ -329,6 +359,9 @@ namespace AMDiS {
     
     // Number of local nodes that are duals.
     int nLocalDuals;
+
+  public:
+    static FetiStatisticsData fetiStatistics;
   };
 #endif