diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 0fd538d6378213d35e364d57771fa13a17fa5083..9d4b86ec332d669e487fb05a97491783ed8bd273 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -189,10 +189,11 @@ namespace AMDiS {
     : PetscSolver(),
       schurPrimalSolver(0),
       multiLevelTest(false),
-      subDomainSolver(NULL),
+      subdomain(NULL),
       meshLevel(0),
       rStartInterior(0),
-      nGlobalOverallInterior(0)
+      nGlobalOverallInterior(0),
+      printTimings(false)
   {
     FUNCNAME("PetscSolverFeti::PetscSolverFeti()");
 
@@ -220,6 +221,8 @@ namespace AMDiS {
     Parameters::get("parallel->multi level test", multiLevelTest);
     if (multiLevelTest)
       meshLevel = 1;
+
+    Parameters::get("parallel->print timings", printTimings);
   }
 
 
@@ -232,17 +235,17 @@ namespace AMDiS {
 
     MeshLevelData& levelData = meshDistributor->getMeshLevelData();
 
-    if (subDomainSolver == NULL) {
-      subDomainSolver = new PetscSolverGlobalMatrix();
+    if (subdomain == NULL) {
+      subdomain = new PetscSolverGlobalMatrix();
 
       if (meshLevel == 0) {
-	subDomainSolver->setMeshDistributor(meshDistributor, 
+	subdomain->setMeshDistributor(meshDistributor, 
 					    mpiCommGlobal, mpiCommLocal);
       } else {
-	subDomainSolver->setMeshDistributor(meshDistributor, 
+	subdomain->setMeshDistributor(meshDistributor, 
 					    levelData.getMpiComm(meshLevel - 1),
 					    levelData.getMpiComm(meshLevel));
-	subDomainSolver->setLevel(meshLevel);
+	subdomain->setLevel(meshLevel);
       }
     }
 
@@ -289,6 +292,8 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createFetiData()");
 
+    double timeCounter = MPI::Wtime();
+
     TEST_EXIT(meshDistributor)("No mesh distributor object defined!\n");
     TEST_EXIT(meshDistributor->getFeSpaces().size() > 0)
       ("No FE space defined in mesh distributor!\n");
@@ -384,7 +389,13 @@ namespace AMDiS {
     }
 
     // If multi level test, inform sub domain solver about coarse space.
-    subDomainSolver->setDofMapping(&localDofMap, &primalDofMap);
+    subdomain->setDofMapping(&localDofMap, &primalDofMap);
+
+    if (printTimings) {
+      timeCounter = MPI::Wtime() - timeCounter;
+      MSG("FETI-DP timing 01: %.5f seconds (creation of basic data structures)", 
+	  timeCounter);
+    }
   }
 
 
@@ -644,6 +655,8 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createMatLagrange()");
 
+    double wtime = MPI::Wtime();
+
     // === Create distributed matrix for Lagrange constraints. ===
 
     MatCreateMPIAIJ(mpiCommGlobal,
@@ -694,6 +707,10 @@ namespace AMDiS {
 
     MatAssemblyBegin(mat_lagrange, MAT_FINAL_ASSEMBLY);
     MatAssemblyEnd(mat_lagrange, MAT_FINAL_ASSEMBLY);
+
+    if (printTimings)
+      MSG("FETI-DP timing 05: %.5f seconds (creation of lagrange constraint matrix)\n", 
+	  MPI::Wtime() - wtime);
   }
 
 
@@ -704,7 +721,7 @@ namespace AMDiS {
     if (schurPrimalSolver == 0) {
       MSG("Create iterative schur primal solver!\n");
 
-      schurPrimalData.subSolver = subDomainSolver;
+      schurPrimalData.subSolver = subdomain;
       
       VecCreateMPI(mpiCommGlobal, 
 		   localDofMap.getRankDofs(), 
@@ -757,13 +774,13 @@ namespace AMDiS {
 
       for (int i = 0; i < nRowsRankB; i++) {
 	PetscInt row = localDofMap.getStartDofs() + i;
-	MatGetRow(subDomainSolver->getMatIntCoarse(), row, &nCols, &cols, &values);
+	MatGetRow(subdomain->getMatIntCoarse(), row, &nCols, &cols, &values);
 
 	for (int j = 0; j < nCols; j++)
 	  if (values[j] != 0.0)
 	    mat_b_primal_cols[cols[j]].push_back(make_pair(i, values[j]));
 
-	MatRestoreRow(subDomainSolver->getMatIntCoarse(), row, &nCols, &cols, &values);
+	MatRestoreRow(subdomain->getMatIntCoarse(), row, &nCols, &cols, &values);
       }
 
       TEST_EXIT(static_cast<int>(mat_b_primal_cols.size()) == 
@@ -782,7 +799,7 @@ namespace AMDiS {
 	VecAssemblyBegin(tmpVec);
 	VecAssemblyEnd(tmpVec);
 
-	subDomainSolver->solve(tmpVec, tmpVec);
+	subdomain->solve(tmpVec, tmpVec);
 
 	PetscScalar *tmpValues;
 	VecGetArray(tmpVec, &tmpValues);
@@ -800,17 +817,13 @@ namespace AMDiS {
       MatAssemblyBegin(matBPi, MAT_FINAL_ASSEMBLY);
       MatAssemblyEnd(matBPi, MAT_FINAL_ASSEMBLY);
 
-      MatDuplicate(subDomainSolver->getMatCoarseCoarse(), MAT_COPY_VALUES, &mat_schur_primal);
-      MatMatMult(subDomainSolver->getMatCoarseInt(), matBPi, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &matPrimal);
+      MatDuplicate(subdomain->getMatCoarseCoarse(), MAT_COPY_VALUES, &mat_schur_primal);
+      MatMatMult(subdomain->getMatCoarseInt(), matBPi, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &matPrimal);
       MatAXPY(mat_schur_primal, -1.0, matPrimal, DIFFERENT_NONZERO_PATTERN);
 
       MatDestroy(&matPrimal);
       MatDestroy(&matBPi);
 
-      MatInfo minfo;
-      MatGetInfo(mat_schur_primal, MAT_GLOBAL_SUM, &minfo);
-      MSG("Schur primal matrix nnz = %f\n", minfo.nz_used);
-
       KSPCreate(mpiCommGlobal, &ksp_schur_primal);
       KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal,
 		      SAME_NONZERO_PATTERN);
@@ -822,8 +835,20 @@ namespace AMDiS {
       PCFactorSetMatSolverPackage(pc_schur_primal, MATSOLVERMUMPS);
       KSPSetFromOptions(ksp_schur_primal);
 
-      MSG("Creating Schur primal matrix needed %.5f seconds.\n",
-	  MPI::Wtime() - wtime);
+      if (printTimings) {
+	MatInfo minfo;
+	MatGetInfo(mat_schur_primal, MAT_GLOBAL_SUM, &minfo);
+	MSG("Schur primal matrix nnz = %f\n", minfo.nz_used);
+	
+	MSG("FETI-DP timing 06: %.5f seconds (creation of schur primal matrix)\n",
+	    MPI::Wtime() - wtime);
+
+	wtime = MPI::Wtime();
+	KSPSetUp(ksp_schur_primal);
+	KSPSetUpOnBlocks(ksp_schur_primal);
+	MSG("FETI-DP timing 07: %.5f seconds (factorization of primal schur matrix).\n",
+	    MPI::Wtime() - wtime);
+      }
     }
   }
 
@@ -851,7 +876,7 @@ namespace AMDiS {
     // === Create FETI-DP solver object. ===
 
     fetiData.mat_lagrange = &mat_lagrange;
-    fetiData.subSolver = subDomainSolver;
+    fetiData.subSolver = subdomain;
     fetiData.ksp_schur_primal = &ksp_schur_primal;
 
     VecCreateMPI(mpiCommGlobal, 
@@ -942,6 +967,17 @@ namespace AMDiS {
       PCSetType(precon_feti, PCSHELL);
       PCShellSetContext(precon_feti, static_cast<void*>(&fetiDirichletPreconData));
       PCShellSetApply(precon_feti, petscApplyFetiDirichletPrecon);
+
+
+      // For the case, that we want to print the timings, we force the LU 
+      // factorization of the local problems to be done here explicitly.
+      if (printTimings) {
+	double wtime = MPI::Wtime();
+	KSPSetUp(ksp_interior);
+	KSPSetUpOnBlocks(ksp_interior);
+	MSG("FETI-DP timing 08: %.5f seconds (factorization of Dirichlet preconditoner matrices)\n",
+	    MPI::Wtime() - wtime);
+      }
       
       break;
 
@@ -1142,13 +1178,21 @@ namespace AMDiS {
     createFetiData();
     
     // === Create matrices for the FETI-DP method. ===
-    
-    subDomainSolver->fillPetscMatrix(mat);
+   
+    double wtime = MPI::Wtime();
+
+    subdomain->fillPetscMatrix(mat);
+
+    if (printTimings)
+      MSG("FETI-DP timing 02: %.5f seconds (creation of interior matrices)\n",
+	  MPI::Wtime() - wtime);
    
     
     // === Create matrices for FETI-DP preconditioner. ===
     
     if (fetiPreconditioner != FETI_NONE) {
+      wtime = MPI::Wtime();
+
       int nRowsDual = dualDofMap.getRankDofs();
 
       MatCreateSeqAIJ(PETSC_COMM_SELF,
@@ -1298,8 +1342,24 @@ namespace AMDiS {
 	MatAssemblyBegin(mat_duals_interior, MAT_FINAL_ASSEMBLY);
 	MatAssemblyEnd(mat_duals_interior, MAT_FINAL_ASSEMBLY);
       }
+
+      if (printTimings)
+	MSG("FETI-DP timing 03: %.5f seconds (creation of preconditioner matrices)\n",
+	    MPI::Wtime() - wtime);
     }
 
+
+    // For the case, that we want to print the timings, we force the LU 
+    // factorization of the local problems to be done here explicitly.
+    if (printTimings) {
+      wtime = MPI::Wtime();
+      KSPSetUp(subdomain->getSolver());
+      KSPSetUpOnBlocks(subdomain->getSolver());
+      MSG("FETI-DP timing 04: %.5f seconds (factorization of subdomain matrices)\n",
+	  MPI::Wtime() - wtime);
+    }
+    
+
     // === Create and fill PETSc matrix for Lagrange constraints. ===
 
     createMatLagrange(feSpaces);
@@ -1320,7 +1380,7 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::fillPetscRhs()");
 
-    subDomainSolver->fillPetscRhs(vec);
+    subdomain->fillPetscRhs(vec);
   }
 
   
@@ -1364,41 +1424,56 @@ namespace AMDiS {
 
     // === Create new rhs ===
 
+    double wtime = MPI::Wtime();
+
     //    d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B]
     // vec_rhs = L * inv(K_BB) * f_B
-    subDomainSolver->solveGlobal(subDomainSolver->getRhsInterior(), tmp_b0);
+    subdomain->solveGlobal(subdomain->getRhsInterior(), tmp_b0);
     MatMult(mat_lagrange, tmp_b0, vec_rhs);
 
     // tmp_primal0 = M_PiB * inv(K_BB) * f_B
-    MatMult(subDomainSolver->getMatCoarseInt(), tmp_b0, tmp_primal0);
+    MatMult(subdomain->getMatCoarseInt(), tmp_b0, tmp_primal0);
 
     // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B
-    VecAXPBY(tmp_primal0, 1.0, -1.0, subDomainSolver->getRhsCoarseSpace());
+    VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getRhsCoarseSpace());
 
     // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
     KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
 
     //
-    MatMult(subDomainSolver->getMatIntCoarse(), tmp_primal0, tmp_b0);
-    subDomainSolver->solveGlobal(tmp_b0, tmp_b0);
+    MatMult(subdomain->getMatIntCoarse(), tmp_primal0, tmp_b0);
+    subdomain->solveGlobal(tmp_b0, tmp_b0);
     MatMult(mat_lagrange, tmp_b0, tmp_lagrange0);
 
     //
     VecAXPBY(vec_rhs, -1.0, 1.0, tmp_lagrange0);
 
+    if (printTimings) {
+      MSG("FETI-DP timing 09: %.5f seconds (create rhs vector)\n", 
+	  MPI::Wtime() - wtime);
+      wtime = MPI::Wtime();
+    }
+
 
     // === Solve with FETI-DP operator. ===
     KSPSolve(ksp_feti, vec_rhs, vec_rhs);
 
+
+    if (printTimings) {
+      MSG("FETI-DP timing 10: %.5f seconds (application of FETI-DP operator)\n", 
+	  MPI::Wtime() - wtime);
+      wtime = MPI::Wtime();
+    }
+
     // === Solve for u_primals. ===
 
-    VecCopy(subDomainSolver->getRhsCoarseSpace(), tmp_primal0);
-    subDomainSolver->solveGlobal(subDomainSolver->getRhsInterior(), tmp_b0);
-    MatMult(subDomainSolver->getMatCoarseInt(), tmp_b0, tmp_primal1);
+    VecCopy(subdomain->getRhsCoarseSpace(), tmp_primal0);
+    subdomain->solveGlobal(subdomain->getRhsInterior(), tmp_b0);
+    MatMult(subdomain->getMatCoarseInt(), tmp_b0, tmp_primal1);
     VecAXPBY(tmp_primal0, -1.0, 1.0, tmp_primal1);
     MatMultTranspose(mat_lagrange, vec_rhs, tmp_b0);
-    subDomainSolver->solveGlobal(tmp_b0, tmp_b0);
-    MatMult(subDomainSolver->getMatCoarseInt(), tmp_b0, tmp_primal1);
+    subdomain->solveGlobal(tmp_b0, tmp_b0);
+    MatMult(subdomain->getMatCoarseInt(), tmp_b0, tmp_primal1);
 
     VecAXPBY(tmp_primal0, 1.0, 1.0, tmp_primal1);
     KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
@@ -1406,18 +1481,23 @@ namespace AMDiS {
     
     // === Solve for u_b. ===
 
-    VecCopy(subDomainSolver->getRhsInterior(), tmp_b0);
+    VecCopy(subdomain->getRhsInterior(), tmp_b0);
     MatMultTranspose(mat_lagrange, vec_rhs, tmp_b1);
     VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1);
 
-    MatMult(subDomainSolver->getMatIntCoarse(), tmp_primal0, tmp_b1);
+    MatMult(subdomain->getMatIntCoarse(), tmp_primal0, tmp_b1);
     VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1);
-    subDomainSolver->solveGlobal(tmp_b0, tmp_b0);
+    subdomain->solveGlobal(tmp_b0, tmp_b0);
 
     // === And recover AMDiS solution vectors. ===
 
     recoverSolution(tmp_b0, tmp_primal0, vec);
 
+    if (printTimings) {
+      MSG("FETI-DP timing 11: %.5f seconds (Inner solve and solution recovery)\n", 
+	  MPI::Wtime() - wtime);
+    }
+
     VecDestroy(&vec_rhs);
     VecDestroy(&tmp_b0);
     VecDestroy(&tmp_b1);
@@ -1448,7 +1528,7 @@ namespace AMDiS {
 
     destroyFetiKsp();
 
-    subDomainSolver->destroyMatrixData();
+    subdomain->destroyMatrixData();
   }
 
 
@@ -1456,7 +1536,7 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::destroyVectorData()");
 
-    subDomainSolver->destroyVectorData();
+    subdomain->destroyVectorData();
   }
 
 
diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h
index 9529c1bc9da267e090324afe178e0c0392c02d5a..0a3cf2771876b14582ee641425306b37f8ee5551 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.h
+++ b/AMDiS/src/parallel/PetscSolverFeti.h
@@ -258,13 +258,15 @@ namespace AMDiS {
 
     bool multiLevelTest;
 
-    PetscSolver *subDomainSolver;
+    PetscSolver *subdomain;
 
     int meshLevel;
 
     int rStartInterior;
 
     int nGlobalOverallInterior;
+
+    bool printTimings;
   };
 
 }
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
index 88b98f4ff630f24f712ce9197ade40f074688896..21a1984205d2b37559fb06f54bdd10de2e4060bb 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
@@ -332,14 +332,16 @@ namespace AMDiS {
     TEST_EXIT_DBG(interiorMap)("No parallel DOF map defined!\n");
     
 
+    // === Transfer values from DOF vector to the PETSc vector. === 
     if (coarseSpaceMap) {
-      fillPetscRhsWithCoarseSpace(vec);
+      for (int i = 0; i < vec->getSize(); i++)
+	setDofVector(rhsInterior, rhsCoarseSpace, vec->getDOFVector(i), i);
     } else {
-      // === Transfer values from DOF vector to the PETSc vector. === 
       for (int i = 0; i < vec->getSize(); i++)
 	setDofVector(rhsInterior, vec->getDOFVector(i), i);
     }
 
+
     VecAssemblyBegin(rhsInterior);
     VecAssemblyEnd(rhsInterior);
 
@@ -360,27 +362,6 @@ namespace AMDiS {
   }
 
 
-  void PetscSolverGlobalMatrix::fillPetscRhsWithCoarseSpace(SystemVector *vec)
-  {
-    FUNCNAME("SubDomainSolver::fillPetscRhs()");
-
-    for (int i = 0; i < vec->getSize(); i++) {
-      const FiniteElemSpace *feSpace = vec->getDOFVector(i)->getFeSpace();
-      DOFVector<double>::Iterator dofIt(vec->getDOFVector(i), USED_DOFS);
-      for (dofIt.reset(); !dofIt.end(); ++dofIt) {
-	int index = dofIt.getDOFIndex();
-	if (isCoarseSpace(feSpace, index)) {	  
-	  index = coarseSpaceMap->getMatIndex(i, index);
-	  VecSetValue(rhsCoarseSpace, index, *dofIt, ADD_VALUES);
-	} else {
-	  index = interiorMap->getMatIndex(i, index) + rStartInterior;
-	  VecSetValue(rhsInterior, index, *dofIt, ADD_VALUES);
-	}      
-      }
-    }
-  }
-
-
   void PetscSolverGlobalMatrix::solvePetscMatrix(SystemVector &vec, 
 						 AdaptInfo *adaptInfo)
   {
@@ -704,7 +685,8 @@ namespace AMDiS {
   }
 
 
-  void PetscSolverGlobalMatrix::setDofVector(Vec& petscVec, 
+  void PetscSolverGlobalMatrix::setDofVector(Vec vecInterior, 
+					     Vec vecCoarse,
 					     DOFVector<double>* vec, 
 					     int nRowVec, 
 					     bool rankOnly)
@@ -726,24 +708,38 @@ namespace AMDiS {
       // Get PETSc's mat index of the row DOF.
       int index = 0;
       if (interiorMap->isMatIndexFromGlobal())
-	index = interiorMap->getMatIndex(nRowVec, globalRowDof);
+	index = 
+	  interiorMap->getMatIndex(nRowVec, globalRowDof) + rStartInterior;
       else
-	index = interiorMap->getMatIndex(nRowVec, dofIt.getDOFIndex());
+	index =
+	  interiorMap->getMatIndex(nRowVec, dofIt.getDOFIndex()) + rStartInterior;
 
       if (perMap.isPeriodic(feSpace, globalRowDof)) {
-	std::set<int>& perAsc = perMap.getAssociations(feSpace, globalRowDof);
-	double value = *dofIt / (perAsc.size() + 1.0);
-	VecSetValue(petscVec, index, value, ADD_VALUES);
-
-	for (std::set<int>::iterator perIt = perAsc.begin(); 
-	     perIt != perAsc.end(); ++perIt) {
-	  int mappedDof = perMap.map(feSpace, *perIt, globalRowDof);
-	  int mappedIndex = interiorMap->getMatIndex(nRowVec, mappedDof);
-	  VecSetValue(petscVec, mappedIndex, value, ADD_VALUES);
+	if (coarseSpaceMap && isCoarseSpace(feSpace, dofIt.getDOFIndex())) {
+	  ERROR_EXIT("Periodic coarse space not yet supported!\n");
+	} else {
+	  std::set<int>& perAsc = perMap.getAssociations(feSpace, globalRowDof);
+	  double value = *dofIt / (perAsc.size() + 1.0);
+	  VecSetValue(vecInterior, index, value, ADD_VALUES);
+	  
+	  for (std::set<int>::iterator perIt = perAsc.begin(); 
+	       perIt != perAsc.end(); ++perIt) {
+	    int mappedDof = perMap.map(feSpace, *perIt, globalRowDof);
+	    int mappedIndex = interiorMap->getMatIndex(nRowVec, mappedDof);
+	    VecSetValue(vecInterior, mappedIndex, value, ADD_VALUES);
+	  }
 	}
-      } else {
+      } else {	  
 	// The DOF index is not periodic.
-	VecSetValue(petscVec, index, *dofIt, ADD_VALUES);
+
+	if (coarseSpaceMap && isCoarseSpace(feSpace, dofIt.getDOFIndex())) {
+	  TEST_EXIT_DBG(vecCoarse != PETSC_NULL)("Should not happen!\n");
+
+	  index = coarseSpaceMap->getMatIndex(nRowVec, dofIt.getDOFIndex());
+	  VecSetValue(vecCoarse, index, *dofIt, ADD_VALUES);
+	} else {
+	  VecSetValue(vecInterior, index, *dofIt, ADD_VALUES);
+	}
       }
     }
   }
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
index caf3c9764c1fe98c4b1e279a5e152001237735c5..94d2117b46bb9026a775ae2256275be80a3a9edf 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
@@ -54,8 +54,6 @@ namespace AMDiS {
 
     void fillPetscRhs(SystemVector *vec);
 
-    void fillPetscRhsWithCoarseSpace(SystemVector *vec);
-
     void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo);    
 
     void solveGlobal(Vec &rhs, Vec &sol);
@@ -72,9 +70,19 @@ namespace AMDiS {
     void setDofMatrix(DOFMatrix* mat, int nRowMat = 0, int nColMat = 0);
 
     /// Takes a DOF vector and sends its values to a given PETSc vector.
-    void setDofVector(Vec& petscVec, DOFVector<double>* vec, 
+    void setDofVector(Vec vecInterior,
+		      Vec vecCoarse,
+		      DOFVector<double>* vec, 
 		      int nRowVec, bool rankOnly = false);
 
+    inline void setDofVector(Vec vecInterior,
+			     DOFVector<double>* vec, 
+			     int nRowVec, bool rankOnly = false)
+    {
+      setDofVector(vecInterior, PETSC_NULL, vec, nRowVec, rankOnly);
+    }
+
+
   protected:
     /// Arrays definig the non zero pattern of Petsc's matrix.
     int *d_nnz, *o_nnz;