#include <config.h>
#include <array>
#include <vector>
#include <fstream>

#include <iostream>
#include <dune/common/indices.hh>
#include <dune/common/bitsetvector.hh>
#include <dune/common/parametertree.hh>
#include <dune/common/parametertreeparser.hh>
#include <dune/common/float_cmp.hh>
#include <dune/common/math.hh>

#include <dune/geometry/quadraturerules.hh>

#include <dune/grid/uggrid.hh>
#include <dune/grid/yaspgrid.hh>
#include <dune/grid/io/file/vtk/subsamplingvtkwriter.hh>

#include <dune/istl/matrix.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/multitypeblockmatrix.hh>
#include <dune/istl/multitypeblockvector.hh>
#include <dune/istl/matrixindexset.hh>
#include <dune/istl/solvers.hh>
#include <dune/istl/spqr.hh>
#include <dune/istl/preconditioners.hh>
#include <dune/istl/io.hh>

#include <dune/functions/functionspacebases/interpolate.hh>

#include <dune/functions/backends/istlvectorbackend.hh>
#include <dune/functions/functionspacebases/powerbasis.hh>
#include <dune/functions/functionspacebases/compositebasis.hh>

#include <dune/functions/functionspacebases/lagrangebasis.hh>
#include <dune/functions/functionspacebases/periodicbasis.hh>

#include <dune/functions/functionspacebases/subspacebasis.hh>
#include <dune/functions/functionspacebases/boundarydofs.hh>
#include <dune/functions/gridfunctions/discreteglobalbasisfunction.hh>
#include <dune/functions/gridfunctions/gridviewfunction.hh>

#include <dune/common/fvector.hh>
#include <dune/common/fmatrix.hh>

#include <dune/microstructure/prestrain_material_geometry.hh>
#include <dune/microstructure/matrix_operations.hh>
#include <dune/microstructure/vtk_filler.hh>    //TEST


#include <dune/functions/functionspacebases/hierarchicvectorwrapper.hh>

// #include <dune/fufem/discretizationerror.hh>
// #include <boost/multiprecision/cpp_dec_float.hpp>
// #include <boost/any.hpp>

#include <any>
#include <variant>
#include <string>
#include <iomanip>   // needed when working with relative paths e.g. from python-scripts

using namespace Dune;
using namespace MatrixOperations;


//////////////////////////////////////////////////////////////////////
// Helper functions for Table-Output
//////////////////////////////////////////////////////////////////////
/*! Center-aligns string within a field of width w. Pads with blank spaces
    to enforce alignment. */
std::string center(const std::string s, const int w) {
    std::stringstream ss, spaces;
    int padding = w - s.size();                 // count excess room to pad
    for(int i=0; i<padding/2; ++i)
        spaces << " ";
    ss << spaces.str() << s << spaces.str();    // format with padding
    if(padding>0 && padding%2!=0)               // if odd #, add 1 space
        ss << " ";
    return ss.str();
}

/* Convert double to string with specified number of places after the decimal
   and left padding. */
template<class type>
std::string prd(const type x, const int decDigits, const int width) {
    std::stringstream ss;
//     ss << std::fixed << std::right;
    ss << std::scientific << std::right;                     // Use scientific Output!
    ss.fill(' ');        // fill space around displayed #
    ss.width(width);     // set  width around displayed #
    ss.precision(decDigits); // set # places after decimal
    ss << x;
    return ss.str();
}





template<class Basis>
auto arbitraryComponentwiseIndices(const Basis& basis,
                                   const int elementNumber,
                                   const int leafIdx
                                   )
{
  // (Local Approach -- works for non Lagrangian-Basis too)
  // Input  : elementNumber & localIdx
  // Output : determine all Component-Indices that correspond to that basis-function
  auto localView = basis.localView();

  FieldVector<int,3> row;
  int elementCounter = 0;

  for (const auto& element : elements(basis.gridView()))
  {
    if(elementCounter == elementNumber)             // get arbitraryIndex(global) for each Component        ..alternativ:gridView.indexSet
    {
      localView.bind(element);

      for (int k = 0; k < 3; k++)
      {
        auto rowLocal = localView.tree().child(k).localIndex(leafIdx);    //Input: LeafIndex! TODO bräuchte hier (Inverse )  Local-to-Leaf Idx Map
        row[k] = localView.index(rowLocal);
        //         std::cout << "rowLocal:" << rowLocal << std::endl;
        //         std::cout << "row[k]:" << row[k] << std::endl;
      }
    }
    elementCounter++;
  }
  return row;
}




template<class LocalView, class Matrix, class localFunction1, class localFunction2>
void computeElementStiffnessMatrix(const LocalView& localView,
                                   Matrix& elementMatrix,
                                   const localFunction1& mu,
                                   const localFunction2& lambda,
                                   const double gamma
                                   )
{
  // Local StiffnessMatrix of the form:
  // | phi*phi    m*phi |
  // | phi *m     m*m   |
  using Element = typename LocalView::Element;
  const Element element = localView.element();
  auto geometry = element.geometry();
  constexpr int dim = Element::dimension;
  constexpr int dimWorld = dim;
  using MatrixRT = FieldMatrix< double, dimWorld, dimWorld>;

  elementMatrix.setSize(localView.size()+3, localView.size()+3);         //extend by dim ´R_sym^{2x2}
  elementMatrix = 0;

  // LocalBasis-Offset
  const int localPhiOffset = localView.size();

  const auto& localFiniteElement = localView.tree().child(0).finiteElement();     
  const auto nSf = localFiniteElement.localBasis().size();
//   std::cout << "localView.size(): " << localView.size() << std::endl;
//   std::cout << "nSf : " << nSf << std::endl;

  ///////////////////////////////////////////////
  // Basis for R_sym^{2x2}            // wird nicht als Funktion benötigt da konstant...
  //////////////////////////////////////////////
  MatrixRT G_1 {{1.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0, 0.0}};
  MatrixRT G_2 {{0.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0, 0.0, 0.0}};
  MatrixRT G_3 {{0.0, 1.0/sqrt(2.0), 0.0}, {1.0/sqrt(2.0), 0.0, 0.0}, {0.0, 0.0, 0.0}};
  std::array<MatrixRT,3 > basisContainer = {G_1, G_2, G_3};

//   int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1)+5;  // TEST
  int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1);
  const auto& quad = QuadratureRules<double,dim>::rule(element.type(), orderQR);
  
  
  
//   std::cout << "Print QuadratureOrder:" << orderQR << std::endl;  //TEST

  for (const auto& quadPoint : quad)
  {
    const auto& quadPos = quadPoint.position();
    const auto jacobianInverseTransposed = geometry.jacobianInverseTransposed(quadPos);
    const auto integrationElement = geometry.integrationElement(quadPos);

    std::vector< FieldMatrix< double, 1, dim> > referenceGradients;
    localFiniteElement.localBasis().evaluateJacobian(quadPos,
                                                     referenceGradients);
    // Compute the shape function gradients on the grid element
    std::vector<FieldVector<double,dim> > gradients(referenceGradients.size());

    for (size_t i=0; i<gradients.size(); i++)
      jacobianInverseTransposed.mv(referenceGradients[i][0], gradients[i]);

    for (size_t l=0; l< dimWorld; l++)
      for (size_t j=0; j < nSf; j++ )
      {
        size_t row = localView.tree().child(l).localIndex(j);
        // (scaled)  Deformation gradient of the test basis function
        MatrixRT defGradientV(0);
        defGradientV[l][0] = gradients[j][0];                       // Y
        defGradientV[l][1] = gradients[j][1];                       //X2
        defGradientV[l][2] = (1.0/gamma)*gradients[j][2];           //X3
        // "phi*phi"-part
        for (size_t k=0; k < dimWorld; k++)
          for (size_t i=0; i < nSf; i++)
          {
            // (scaled) Deformation gradient of the ansatz basis function
            MatrixRT defGradientU(0);
            defGradientU[k][0] = gradients[i][0];                       // Y
            defGradientU[k][1] = gradients[i][1];                       //X2
            defGradientU[k][2] = (1.0/gamma)*gradients[i][2];           //X3
            // printmatrix(std::cout, defGradientU , "defGradientU", "--");

            double energyDensity = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), defGradientU, defGradientV);
//             double energyDensity = generalizedDensity(mu(quadPos), lambda(quadPos), defGradientU, defGradientV);  // also works..

            size_t col = localView.tree().child(k).localIndex(i);                       
            
            elementMatrix[row][col] += energyDensity * quadPoint.weight() * integrationElement;
           }
            
           // "m*phi"  & "phi*m" - part
           for( size_t m=0; m<3; m++)
           {
               double energyDensityGphi = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), basisContainer[m], defGradientV);
               auto value = energyDensityGphi * quadPoint.weight() * integrationElement;
               elementMatrix[row][localPhiOffset+m] += value;
               elementMatrix[localPhiOffset+m][row] += value;
            }
          
      }
    // "m*m"-part
    for(size_t m=0; m<3; m++)
      for(size_t n=0; n<3; n++)
      {
        double energyDensityGG = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), basisContainer[m], basisContainer[n]);
        elementMatrix[localPhiOffset+m][localPhiOffset+n]= energyDensityGG * quadPoint.weight() * integrationElement;
      }
  }
}



// Get the occupation pattern of the stiffness matrix
template<class Basis, class ParameterSet>
void getOccupationPattern(const Basis& basis, MatrixIndexSet& nb, ParameterSet& parameterSet)
{
  //  OccupationPattern:
  // | phi*phi    m*phi |
  // | phi *m     m*m   |

  auto localView = basis.localView();
  const int phiOffset = basis.dimension();

  nb.resize(basis.size()+3, basis.size()+3);

  for (const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    for (size_t i=0; i<localView.size(); i++)
    {
      // The global index of the i-th vertex of the element
      auto row = localView.index(i);
      for (size_t j=0; j<localView.size(); j++ )
      {
        // The global index of the j-th vertex of the element
        auto col = localView.index(j);
        nb.add(row[0],col[0]);                   // nun würde auch nb.add(row,col) gehen..
      }
    }
    for (size_t i=0; i<localView.size(); i++)
    {
      auto row = localView.index(i);

      for (size_t j=0; j<3; j++ )
      {
        nb.add(row,phiOffset+j);               // m*phi part of StiffnessMatrix
        nb.add(phiOffset+j,row);               // phi*m part of StiffnessMatrix
      }
    }
    for (size_t i=0; i<3; i++ )
      for (size_t j=0; j<3; j++ )
      {
        nb.add(phiOffset+i,phiOffset+j);       // m*m part of StiffnessMatrix
      }
  }
  //////////////////////////////////////////////////////////////////
  // setOneBaseFunctionToZero
  //////////////////////////////////////////////////////////////////
  if(parameterSet.template get<bool>("set_oneBasisFunction_Zero ", true)){
    FieldVector<int,3> row;
    unsigned int arbitraryLeafIndex =  parameterSet.template get<unsigned int>("arbitraryLeafIndex", 0);
    unsigned int arbitraryElementNumber =  parameterSet.template get<unsigned int>("arbitraryElementNumber", 0);

    const auto& localFiniteElement = localView.tree().child(0).finiteElement();    // macht keinen Unterschied ob hier k oder 0..
    const auto nSf = localFiniteElement.localBasis().size();
    assert(arbitraryLeafIndex < nSf );
    assert(arbitraryElementNumber  < basis.gridView().size(0));   // "arbitraryElementNumber larger than total Number of Elements"

    //Determine 3 global indices (one for each component of an arbitrary local FE-function)
    row = arbitraryComponentwiseIndices(basis,arbitraryElementNumber,arbitraryLeafIndex);

    for (int k = 0; k<3; k++)
      nb.add(row[k],row[k]);
  }
}



// Compute the source term for a single element
// < L (sym[D_gamma*nabla phi_i] + M_i ), x_3G_alpha >
template<class LocalView, class LocalFunction1, class LocalFunction2, class Vector, class Force>
void computeElementLoadVector( const LocalView& localView,
                               LocalFunction1& mu,
                               LocalFunction2& lambda,
                               const double gamma,
                               Vector& elementRhs,
                               const Force& forceTerm
                               )
{
  // | f*phi|
  // | ---  |
  // | f*m  |
  using Element = typename LocalView::Element;
  const auto element = localView.element();
  const auto geometry = element.geometry();
  constexpr int dim = Element::dimension;
  constexpr int dimWorld = dim;

  using MatrixRT = FieldMatrix< double, dimWorld, dimWorld>;

  // Set of shape functions for a single element
  const auto& localFiniteElement= localView.tree().child(0).finiteElement();
  const auto nSf = localFiniteElement.localBasis().size();

  elementRhs.resize(localView.size() +3);
  elementRhs = 0;

  // LocalBasis-Offset
  const int localPhiOffset = localView.size();

  ///////////////////////////////////////////////
  // Basis for R_sym^{2x2}
  //////////////////////////////////////////////
  MatrixRT G_1 {{1.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0, 0.0}};
  MatrixRT G_2 {{0.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0, 0.0, 0.0}};
  MatrixRT G_3 {{0.0, 1.0/sqrt(2.0), 0.0}, {1.0/sqrt(2.0), 0.0, 0.0}, {0.0, 0.0, 0.0}};
  std::array<MatrixRT,3 > basisContainer = {G_1, G_2, G_3};

//   int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1)+5;  // TEST
  int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1);   // für simplex
  const auto& quad = QuadratureRules<double,dim>::rule(element.type(), orderQR);

  for (const auto& quadPoint : quad)
  {
    const FieldVector<double,dim>& quadPos = quadPoint.position();
    const auto jacobian = geometry.jacobianInverseTransposed(quadPos);
    const double integrationElement = geometry.integrationElement(quadPos);

    std::vector<FieldMatrix<double,1,dim> > referenceGradients;

    localFiniteElement.localBasis().evaluateJacobian(quadPos, referenceGradients);

    std::vector< FieldVector< double, dim> > gradients(referenceGradients.size());  
    for (size_t i=0; i< gradients.size(); i++)
      jacobian.mv(referenceGradients[i][0], gradients[i]);

    // "f*phi"-part
    for (size_t i=0; i < nSf; i++)
      for (size_t k=0; k < dimWorld; k++)
      {
        // Deformation gradient of the ansatz basis function
        MatrixRT defGradientV(0);
        defGradientV[k][0] = gradients[i][0];                                 // Y
        defGradientV[k][1] = gradients[i][1];                                 // X2
        defGradientV[k][2] = (1.0/gamma)*gradients[i][2];                     // X3

        double energyDensity = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), defGradientV, forceTerm(quadPos));
        size_t row = localView.tree().child(k).localIndex(i);
        elementRhs[row] += energyDensity * quadPoint.weight() * integrationElement;
      }

    // "f*m"-part
    for (size_t m=0; m<3; m++)
    {
      double energyDensityfG = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), basisContainer[m], forceTerm(quadPos));
      elementRhs[localPhiOffset+m] += energyDensityfG * quadPoint.weight() * integrationElement;
    }
  }
}



template<class Basis, class Matrix, class LocalFunction1, class LocalFunction2, class ParameterSet>
void assembleCellStiffness(const Basis& basis,
                           LocalFunction1& muLocal,
                           LocalFunction2& lambdaLocal,
                           const double gamma,
                           Matrix& matrix,
                           ParameterSet& parameterSet
                           )
{
  std::cout << "assemble Stiffness-Matrix begins." << std::endl;

  MatrixIndexSet occupationPattern;
  getOccupationPattern(basis, occupationPattern, parameterSet);
  occupationPattern.exportIdx(matrix);
  matrix = 0;

  auto localView = basis.localView();
  const int phiOffset = basis.dimension();

  for (const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    muLocal.bind(element);
    lambdaLocal.bind(element);
    const int localPhiOffset = localView.size();
//     Dune::Timer Time;
    //std::cout << "localPhiOffset : " << localPhiOffset << std::endl;
    Dune::Matrix<FieldMatrix<double,1,1> > elementMatrix;
    computeElementStiffnessMatrix(localView, elementMatrix, muLocal, lambdaLocal, gamma);
    
//     std::cout << "local assembly time:" << Time.elapsed() << std::endl;
    
    //printmatrix(std::cout, elementMatrix, "ElementMatrix", "--");
    //std::cout << "elementMatrix.N() : " << elementMatrix.N() << std::endl;
    //std::cout << "elementMatrix.M() : " << elementMatrix.M() << std::endl;

    //////////////////////////////////////////////////////////////////////////////
    // GLOBAL STIFFNES ASSEMBLY
    //////////////////////////////////////////////////////////////////////////////
    for (size_t i=0; i<localPhiOffset; i++)
    for (size_t j=0; j<localPhiOffset; j++ )
    {
        auto row = localView.index(i);
        auto col = localView.index(j);
        matrix[row][col] += elementMatrix[i][j];
    }
    for (size_t i=0; i<localPhiOffset; i++)
    for(size_t m=0; m<3; m++)
    {
        auto row = localView.index(i);
        matrix[row][phiOffset+m] += elementMatrix[i][localPhiOffset+m];
        matrix[phiOffset+m][row] += elementMatrix[localPhiOffset+m][i];
    }
    for (size_t m=0; m<3; m++ )
    for (size_t n=0; n<3; n++ )
        matrix[phiOffset+m][phiOffset+n] += elementMatrix[localPhiOffset+m][localPhiOffset+n];

    //  printmatrix(std::cout, matrix, "StiffnessMatrix", "--");
  }
}


template<class Basis, class LocalFunction1, class LocalFunction2, class Vector, class Force>
void assembleCellLoad(const Basis& basis,
                      LocalFunction1& muLocal,
                      LocalFunction2& lambdaLocal,
                      const double gamma,
                      Vector& b,
                      const Force& forceTerm
                      )
{
  //     std::cout << "assemble load vector." << std::endl;
  b.resize(basis.size()+3);
  b = 0;

  auto localView = basis.localView();
  const int phiOffset = basis.dimension();

  // Transform G_alpha's to GridViewFunctions/LocalFunctions 
  auto loadGVF  = Dune::Functions::makeGridViewFunction(forceTerm, basis.gridView());
  auto loadFunctional = localFunction(loadGVF);      

  for (const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    muLocal.bind(element);
    lambdaLocal.bind(element);
    loadFunctional.bind(element);

    const int localPhiOffset = localView.size();
    //         std::cout << "localPhiOffset : " << localPhiOffset << std::endl;

    BlockVector<FieldVector<double,1> > elementRhs;
    computeElementLoadVector(localView, muLocal, lambdaLocal, gamma, elementRhs, loadFunctional);
//     printvector(std::cout, elementRhs, "elementRhs", "--");
//     printvector(std::cout, elementRhs, "elementRhs", "--");
    //////////////////////////////////////////////////////////////////////////////
    // GLOBAL LOAD ASSEMBLY
    //////////////////////////////////////////////////////////////////////////////
    for (size_t p=0; p<localPhiOffset; p++)
    {
      auto row = localView.index(p);
      b[row] += elementRhs[p];
    }
    for (size_t m=0; m<3; m++ )
      b[phiOffset+m] += elementRhs[localPhiOffset+m];
      //printvector(std::cout, b, "b", "--");
  }
}



template<class Basis, class LocalFunction1, class LocalFunction2, class MatrixFunction>
auto energy(const Basis& basis,
            LocalFunction1& mu,
            LocalFunction2& lambda,
            const MatrixFunction& matrixFieldFuncA,
            const MatrixFunction& matrixFieldFuncB)
{

//   TEST HIGHER PRECISION
//   using float_50 = boost::multiprecision::cpp_dec_float_50;
//   float_50 higherPrecEnergy = 0.0;
  double energy = 0.0;
  constexpr int dim = Basis::LocalView::Element::dimension;
  constexpr int dimWorld = dim;

  auto localView = basis.localView();

  auto matrixFieldAGVF  = Dune::Functions::makeGridViewFunction(matrixFieldFuncA, basis.gridView());
  auto matrixFieldA = localFunction(matrixFieldAGVF);
  auto matrixFieldBGVF  = Dune::Functions::makeGridViewFunction(matrixFieldFuncB, basis.gridView());
  auto matrixFieldB = localFunction(matrixFieldBGVF);

  using GridView = typename Basis::GridView;
  using Domain = typename GridView::template Codim<0>::Geometry::GlobalCoordinate;
  using MatrixRT = FieldMatrix< double, dimWorld, dimWorld>;
//   TEST
//   FieldVector<double,3> testvector = {1.0 , 1.0 , 1.0};
//   printmatrix(std::cout, matrixFieldFuncB(testvector) , "matrixFieldB(testvector) ", "--");

  for (const auto& e : elements(basis.gridView()))
  {
    localView.bind(e);
    matrixFieldA.bind(e);
    matrixFieldB.bind(e);
    mu.bind(e);
    lambda.bind(e);

    double elementEnergy = 0.0;
    //double elementEnergy_HP = 0.0;

    auto geometry = e.geometry();
    const auto& localFiniteElement = localView.tree().child(0).finiteElement();

//     int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1 + 5 );  // TEST
    int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1);
    const QuadratureRule<double, dim>& quad = QuadratureRules<double, dim>::rule(e.type(), orderQR);

    for (const auto& quadPoint : quad) 
    {
      const auto& quadPos = quadPoint.position();
      const double integrationElement = geometry.integrationElement(quadPos);

      auto strain1 = matrixFieldA(quadPos);
      auto strain2 = matrixFieldB(quadPos);

      double energyDensity = linearizedStVenantKirchhoffDensity(mu(quadPos), lambda(quadPos), strain1, strain2);

      elementEnergy += energyDensity * quadPoint.weight() * integrationElement;          
      //elementEnergy_HP += energyDensity * quadPoint.weight() * integrationElement;
    }
    energy += elementEnergy;
    //higherPrecEnergy += elementEnergy_HP;
  }
//   TEST
//   std::cout << std::setprecision(std::numeric_limits<float_50>::digits10) << higherPrecEnergy << std::endl;
  return energy;
}



template<class Basis, class Matrix, class Vector, class ParameterSet>
void setOneBaseFunctionToZero(const Basis& basis,
                              Matrix& stiffnessMatrix,
                              Vector& load_alpha1,
                              Vector& load_alpha2,
                              Vector& load_alpha3,
                              ParameterSet& parameterSet
                              )
{
  std::cout << "Setting one Basis-function to zero" << std::endl;

  constexpr int dim = Basis::LocalView::Element::dimension;
  
  unsigned int arbitraryLeafIndex =  parameterSet.template get<unsigned int>("arbitraryLeafIndex", 0);
  unsigned int arbitraryElementNumber =  parameterSet.template get<unsigned int>("arbitraryElementNumber", 0);
  //Determine 3 global indices (one for each component of an arbitrary local FE-function)
  FieldVector<int,3> row = arbitraryComponentwiseIndices(basis,arbitraryElementNumber,arbitraryLeafIndex);

  for (int k = 0; k < dim; k++)
  {
    load_alpha1[row[k]] = 0.0;
    load_alpha2[row[k]] = 0.0;
    load_alpha3[row[k]] = 0.0;
    auto cIt    = stiffnessMatrix[row[k]].begin();
    auto cEndIt = stiffnessMatrix[row[k]].end();
    for (; cIt!=cEndIt; ++cIt)
      *cIt = (cIt.index()==row[k]) ? 1.0 : 0.0;
  }
}



template<class Basis>
auto childToIndexMap(const Basis& basis,
                     const int k
                     )
{
  // Input  : child/component
  // Output : determine all Indices that belong to that component
  auto localView = basis.localView();

  std::vector<int> r = { };
  //     for (int n: r)
  //         std::cout << n << ","<< std::endl;

  // Determine global indizes for each component k = 1,2,3.. in order to subtract correct (component of) integral Mean
  // (global) Indices that correspond to component k = 1,2,3
  for(const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    const auto& localFiniteElement = localView.tree().child(k).finiteElement();        
    const auto nSf = localFiniteElement.localBasis().size();                           

    for(size_t j=0; j<nSf; j++)
    {
      auto Localidx = localView.tree().child(k).localIndex(j);  // local  indices
      r.push_back(localView.index(Localidx));                   // global indices
    }
  }
  // Delete duplicate elements
  // first remove consecutive (adjacent) duplicates
  auto last = std::unique(r.begin(), r.end());
  r.erase(last, r.end());
  // sort followed by unique, to remove all duplicates
  std::sort(r.begin(), r.end());
  last = std::unique(r.begin(), r.end());
  r.erase(last, r.end());

  return r;
}


template<class Basis, class Vector>
auto integralMean(const Basis& basis,
                  Vector& coeffVector
                  )
{
  // computes integral mean of given LocalFunction

  constexpr int dim = Basis::LocalView::Element::dimension;

  auto GVFunction = Functions::makeDiscreteGlobalBasisFunction<FieldVector<double,dim>>(basis,coeffVector);
  auto localfun = localFunction(GVFunction);

  auto localView = basis.localView();

  FieldVector<double,3> r = {0.0, 0.0, 0.0};
  double area = 0.0;

  // Compute Area integral & integral of FE-function
  for(const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    localfun.bind(element);
    const auto& localFiniteElement = localView.tree().child(0).finiteElement();
    
//     int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1)+5; //TEST
    int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1);
    const QuadratureRule<double, dim>& quad = QuadratureRules<double, dim>::rule(element.type(), orderQR);

    for(const auto& quadPoint : quad)
    {
      const auto& quadPos = quadPoint.position();
      const double integrationElement = element.geometry().integrationElement(quadPos);
      area += quadPoint.weight() * integrationElement;
      
      r += localfun(quadPos) * quadPoint.weight() * integrationElement;
    }
  }
  //     std::cout << "Domain-Area: " << area << std::endl;
  return (1.0/area) * r ;
}


template<class Basis, class Vector>
auto subtractIntegralMean(const Basis& basis,
                          Vector& coeffVector
                          )
{
  // Substract correct Integral mean from each associated component function
  auto IM = integralMean(basis, coeffVector);

  constexpr int dim = Basis::LocalView::Element::dimension;

  for(size_t k=0; k<dim; k++)
  {
    //std::cout << "Integral-Mean: " << IM[k] << std::endl;
    auto idx = childToIndexMap(basis,k);

    for ( int i : idx)
      coeffVector[i] -= IM[k];
  }
}



//////////////////////////////////////////////////
//   Infrastructure for handling periodicity
//////////////////////////////////////////////////

// Check whether two points are equal on R/Z x R/Z x R
auto equivalent = [](const FieldVector<double,3>& x, const FieldVector<double,3>& y)
                {
                    return ( (FloatCmp::eq(x[0],y[0]) or FloatCmp::eq(x[0]+1,y[0]) or FloatCmp::eq(x[0]-1,y[0]))
                            and (FloatCmp::eq(x[1],y[1]) or FloatCmp::eq(x[1]+1,y[1]) or FloatCmp::eq(x[1]-1,y[1]))
                            and (FloatCmp::eq(x[2],y[2]))
                        );
                };


                
////////////////////////////////////////////////////////////// L2-ERROR /////////////////////////////////////////////////////////////////
template<class Basis, class Vector, class MatrixFunction>
double computeL2SymError(const Basis& basis,
                         Vector& coeffVector,
                         const double gamma,
                         const MatrixFunction& matrixFieldFunc)
{
  double error = 0.0;


  auto localView = basis.localView();
  

  constexpr int dim = Basis::LocalView::Element::dimension;             //TODO TEST 
  constexpr int dimWorld = 3;                                           // Hier auch möglich? 
  
  auto matrixFieldGVF  = Dune::Functions::makeGridViewFunction(matrixFieldFunc, basis.gridView());
  auto matrixField = localFunction(matrixFieldGVF);
  using MatrixRT = FieldMatrix< double, dimWorld, dimWorld>;

  for (const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    matrixField.bind(element);
    auto geometry = element.geometry();

    const auto& localFiniteElement = localView.tree().child(0).finiteElement();             
    const auto nSf = localFiniteElement.localBasis().size();

//     int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1)+5; //TEST
    int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1 );  
    const auto& quad = QuadratureRules<double,dim>::rule(element.type(), orderQR);

    for (const auto& quadPoint : quad)
    {
        const auto& quadPos = quadPoint.position();
        const auto jacobianInverseTransposed = geometry.jacobianInverseTransposed(quadPos);
        const auto integrationElement = geometry.integrationElement(quadPos);

        std::vector< FieldMatrix<double, 1, dim>> referenceGradients;
        localFiniteElement.localBasis().evaluateJacobian(quadPos,referenceGradients);

        // Compute the shape function gradients on the grid element
        std::vector<FieldVector<double,dim>> gradients(referenceGradients.size());

        for (size_t i=0; i<gradients.size(); i++)
          jacobianInverseTransposed.mv(referenceGradients[i][0], gradients[i]);

        MatrixRT tmp(0);
        double sum = 0.0;

        for (size_t k=0; k < dimWorld; k++)
        for (size_t i=0; i < nSf; i++)
        {
            size_t localIdx = localView.tree().child(k).localIndex(i);  // hier i:leafIdx
            size_t globalIdx = localView.index(localIdx);

            // (scaled) Deformation gradient of the ansatz basis function
            MatrixRT defGradientU(0);
            defGradientU[k][0] = coeffVector[globalIdx]*gradients[i][0];                       // Y  //hier i:leafIdx
            defGradientU[k][1] = coeffVector[globalIdx]*gradients[i][1];                       //X2
            defGradientU[k][2] = coeffVector[globalIdx]*(1.0/gamma)*gradients[i][2];           //X3

            tmp += sym(defGradientU);
        }
//         printmatrix(std::cout, matrixField(quadPos), "matrixField(quadPos)", "--");
//         printmatrix(std::cout, tmp, "tmp", "--");                                    // TEST Symphi_1 hat andere Struktur als analytic?
//         tmp = tmp - matrixField(quadPos);
//         printmatrix(std::cout, tmp - matrixField(quadPos), "Difference", "--");
        for (int ii=0; ii<dimWorld; ii++)
        for (int jj=0; jj<dimWorld; jj++)
        {
            sum +=  std::pow(tmp[ii][jj]-matrixField(quadPos)[ii][jj],2);
        }
//         std::cout << "sum:" << sum << std::endl;
        error += sum * quadPoint.weight() * integrationElement;
//         std::cout << "error:" << error << std::endl;
    }
  }
  return sqrt(error);
}
////////////////////////////////////////////////////////////// L2-NORM /////////////////////////////////////////////////////////////////
template<class Basis, class Vector>
double computeL2Norm(const Basis& basis,
                     Vector& coeffVector)
{
  // IMPLEMENTATION with makeDiscreteGlobalBasisFunction
  double error = 0.0;

  constexpr int dim = Basis::LocalView::Element::dimension;
  constexpr int dimWorld = dim;
  auto localView = basis.localView();
  auto GVFunc = Functions::makeDiscreteGlobalBasisFunction<FieldVector<double,dim>>(basis,coeffVector);
  auto localfun = localFunction(GVFunc);

  for(const auto& element : elements(basis.gridView()))
  {
    localView.bind(element);
    localfun.bind(element);
    const auto& localFiniteElement = localView.tree().child(0).finiteElement();

    
//     int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1)+5; //TEST
    int orderQR = 2*(dim*localFiniteElement.localBasis().order()-1);
    const QuadratureRule<double, dim>& quad = QuadratureRules<double, dim>::rule(element.type(), orderQR);

    for(const auto& quadPoint : quad)
    {
      const auto& quadPos = quadPoint.position();
      const double integrationElement = element.geometry().integrationElement(quadPos);
      error += localfun(quadPos)*localfun(quadPos) * quadPoint.weight() * integrationElement;
    }
  }
  return sqrt(error);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
{
  MPIHelper::instance(argc, argv);
  
  Dune::Timer globalTimer;

  ParameterTree parameterSet;
  if (argc < 2)
    ParameterTreeParser::readINITree("../../inputs/cellsolver.parset", parameterSet);
  else
  {
    ParameterTreeParser::readINITree(argv[1], parameterSet);
    ParameterTreeParser::readOptions(argc, argv, parameterSet);
  }

  //--- Output setter
//   std::string outputPath = parameterSet.get("outputPath", "../../outputs/output.txt");
//   std::string outputPath = parameterSet.get("outputPath", "/home/klaus/Desktop/DUNE/dune-microstructure/outputs/output.txt");
  std::string outputPath = parameterSet.get("outputPath", "/home/klaus/Desktop/DUNE/dune-microstructure/outputs");
//   std::string MatlabPath = parameterSet.get("MatlabPath", "/home/klaus/Desktop/DUNE/dune-microstructure/Matlab-Programs");
//     std::string outputPath = "/home/klaus/Desktop/DUNE/dune-microstructure/outputs/output.txt";
  std::fstream log;
  log.open(outputPath + "/output.txt" ,std::ios::out);

  std::cout << "outputPath:" << outputPath << std::endl;
  
//   parameterSet.report(log); // short Alternativ
  
  
  constexpr int dim = 3;
  constexpr int dimWorld = 3;

  ///////////////////////////////////
  // Get Parameters/Data
  ///////////////////////////////////
  double gamma = parameterSet.get<double>("gamma",1.0);   // ratio dimension reduction to homogenization
  double alpha = parameterSet.get<double>("alpha", 2.0);
  double theta = parameterSet.get<double>("theta",1.0/4.0); 
  ///////////////////////////////////
  // Get Material Parameters
  ///////////////////////////////////
  std::string imp = parameterSet.get<std::string>("material_prestrain_imp", "analytical_Example");
  log << "material_prestrain used: "<< imp  << std::endl;
  double beta = parameterSet.get<double>("beta",2.0); 
  double mu1 = parameterSet.get<double>("mu1",1.0);;
  double mu2 = beta*mu1;
  double lambda1 = parameterSet.get<double>("lambda1",0.0);;
  double lambda2 = beta*lambda1;
  
  // Plate geometry settings
  double width = parameterSet.get<double>("width", 1.0);   //geometry cell, cross section
  //     double len  = parameterSet.get<double>("len", 10.0); //length
  //     double height  = parameterSet.get<double>("h", 0.1); //rod thickness
  //     double eps  = parameterSet.get<double>("eps", 0.1); //size of perioticity cell

  ///////////////////////////////////
  // Get Prestrain/Parameters
  ///////////////////////////////////
//   unsigned int prestraintype = parameterSet.get<unsigned int>("prestrainType", "analytical_Example");  //OLD 
//   std::string prestraintype = parameterSet.get<std::string>("prestrainType", "analytical_Example");
//   double rho1 = parameterSet.get<double>("rho1", 1.0);
//   double rho2 = alpha*rho1;
//   auto prestrainImp = PrestrainImp(rho1, rho2, theta, width);
//   auto B_Term = prestrainImp.getPrestrain(prestraintype);
  
  
  
  auto prestrainImp = PrestrainImp<dim>(); //NEW 
  auto B_Term = prestrainImp.getPrestrain(parameterSet);

  log << "----- Input Parameters -----: " << std::endl;
//   log << "prestrain imp: " <<  prestraintype << "\nrho1 = " << rho1 << "\nrho2 = " << rho2  << std::endl;
  log << "alpha: " << alpha << std::endl;
  log << "gamma: " << gamma << std::endl;
  log << "theta: " << theta << std::endl;
  log << "beta: " << beta << std::endl;
  log << "material parameters: " << std::endl;
  log << "mu1: " << mu1 << "\nmu2: " << mu2 << std::endl;
  log << "lambda1: " << lambda1 <<"\nlambda2: " << lambda2 << std::endl;
  log << "----------------------------: " << std::endl;

  ///////////////////////////////////
  // Generate the grid
  ///////////////////////////////////

  //Corrector Problem Domain:
  unsigned int cellDomain = parameterSet.get<unsigned int>("cellDomain", 1);
  // (shifted) Domain (-1/2,1/2)^3
  FieldVector<double,dim> lower({-1.0/2.0, -1.0/2.0, -1.0/2.0});
  FieldVector<double,dim> upper({1.0/2.0, 1.0/2.0, 1.0/2.0});
  if (cellDomain == 2)
  {
    // Domain : [0,1)^2 x (-1/2, 1/2)
    FieldVector<double,dim> lower({0.0, 0.0, -1.0/2.0});
    FieldVector<double,dim> upper({1.0, 1.0, 1.0/2.0});
  }

  std::array<int,2> numLevels = parameterSet.get<std::array<int,2>>("numLevels", {1,3});

  int levelCounter = 0;

  ///////////////////////////////////
  // Create Data Storage
  ///////////////////////////////////
  // Storage:: #1 level #2 L2SymError #3 L2SymErrorOrder #4  L2Norm(sym) #5 L2Norm(sym-analytic) #6 L2Norm(phi_1)
  std::vector<std::variant<std::string, size_t , double>> Storage_Error;
  
  // Storage:: #1 level #2 |q1_a-q1_c| #3 |q2_a-q2_c| #4 |q3_a-q3_c| #5 |b1_a-b1_c| #6 |b2_a-b2_c| #7 |b3_a-b3_c|           
   std::vector<std::variant<std::string, size_t , double>> Storage_Quantities;         


//   for(const size_t &level : numLevels)                             // explixite Angabe der levels.. {2,4}
  for(size_t level = numLevels[0] ; level <= numLevels[1]; level++)    // levels von bis.. [2,4]
  {
    std::cout << " ----------------------------------" << std::endl;
    std::cout << "Level: " << level << std::endl;
    std::cout << " ----------------------------------" << std::endl;

    Storage_Error.push_back(level);
    Storage_Quantities.push_back(level);
    std::array<int, dim> nElements = { (int)std::pow(2,level) , (int)std::pow(2,level) , (int)std::pow(2,level) };

    std::cout << "Number of Elements in each direction: " << nElements << std::endl;
    log << "Number of Elements in each direction: " << nElements << std::endl;

    using CellGridType = YaspGrid<dim, EquidistantOffsetCoordinates<double, dim> >;
    CellGridType grid_CE(lower,upper,nElements);
    using GridView = CellGridType::LeafGridView;
    const GridView gridView_CE = grid_CE.leafGridView();

    using MatrixRT = FieldMatrix< double, dimWorld, dimWorld>;
    using Domain = GridView::Codim<0>::Geometry::GlobalCoordinate;
    using Func2Tensor = std::function< MatrixRT(const Domain&) >;
    using VectorCT = BlockVector<FieldVector<double,1> >;
    using MatrixCT = BCRSMatrix<FieldMatrix<double,1,1> >;

    ///////////////////////////////////
    //  Create Lambda-Functions for material Parameters depending on microstructure
    ///////////////////////////////////
    
    auto materialImp = IsotropicMaterialImp<dim>();
    auto muTerm = materialImp.getMu(parameterSet);
    auto lambdaTerm = materialImp.getLambda(parameterSet);

/*  
    auto muTerm = [mu1, mu2, theta] (const Domain& z) {
                    if (abs(z[0]) >= (theta/2.0))                                                   
                        return mu1;
                    else
                        return mu2;
                    };

    auto lambdaTerm = [lambda1,lambda2, theta] (const Domain& z) {
                    if (abs(z[0]) >= (theta/2.0))
                        return lambda1;
                    else
                        return lambda2;
                    };*/

    auto muGridF  = Dune::Functions::makeGridViewFunction(muTerm, gridView_CE);
    auto muLocal = localFunction(muGridF);
    auto lambdaGridF  = Dune::Functions::makeGridViewFunction(lambdaTerm, gridView_CE);
    auto lambdaLocal = localFunction(lambdaGridF);

    ///////////////////////////////////
    // --- Choose Solver ---
    // 1 : CG-Solver
    // 2 : GMRES
    // 3 : QR
    ///////////////////////////////////
    unsigned int Solvertype = parameterSet.get<unsigned int>("Solvertype", 1);
    
    unsigned int Solver_verbosity = parameterSet.get<unsigned int>("Solver_verbosity", 2);
    
    // Print Options 
    bool print_debug = parameterSet.get<bool>("print_debug", false);
    
    
    bool write_materialFunctions = parameterSet.get<bool>("write_materialFunctions", false);
    bool write_prestrainFunctions  = parameterSet.get<bool>("write_prestrainFunctions", false);
    
    
    bool write_corrector_phi1 = parameterSet.get<bool>("write_corrector_phi1", false);
    bool write_corrector_phi2 = parameterSet.get<bool>("write_corrector_phi2", false);
    bool write_corrector_phi3 = parameterSet.get<bool>("write_corrector_phi3", false);
    bool write_L2Error = parameterSet.get<bool>("write_L2Error", false);
    bool write_IntegralMean = parameterSet.get<bool>("write_IntegralMean", false);
    
    bool write_VTK = parameterSet.get<bool>("write_VTK", false);

    /////////////////////////////////////////////////////////
    // Choose a finite element space for Cell Problem
    /////////////////////////////////////////////////////////
    using namespace Functions::BasisFactory;
    Functions::BasisFactory::Experimental::PeriodicIndexSet periodicIndices;

    // Don't do the following in real life: It has quadratic run-time in the number of vertices.
    for (const auto& v1 : vertices(gridView_CE))
        for (const auto& v2 : vertices(gridView_CE))
            if (equivalent(v1.geometry().corner(0), v2.geometry().corner(0)))
            {
                periodicIndices.unifyIndexPair({gridView_CE.indexSet().index(v1)}, {gridView_CE.indexSet().index(v2)});
            }

    // First order periodic Lagrange-Basis
    auto Basis_CE = makeBasis(
        gridView_CE,
        power<dim>(                                                                             // eig dimworld?!?! 
        Functions::BasisFactory::Experimental::periodic(lagrange<1>(), periodicIndices),
        flatLexicographic()));     

    log << "size of FiniteElementBasis: " << Basis_CE.size() << std::endl;
    const int phiOffset = Basis_CE.size();

    /////////////////////////////////////////////////////////
    // Data structures: Stiffness matrix and right hand side vectors
    /////////////////////////////////////////////////////////
    VectorCT load_alpha1, load_alpha2, load_alpha3;
    MatrixCT stiffnessMatrix_CE;

    bool set_IntegralZero = parameterSet.get<bool>("set_IntegralZero", true);
    bool set_oneBasisFunction_Zero = false;
    bool substract_integralMean = false;
    if(set_IntegralZero)
    {
        set_oneBasisFunction_Zero = true;
        substract_integralMean = true;
    }

    /////////////////////////////////////////////////////////
    // Define "rhs"-functions
    /////////////////////////////////////////////////////////
    Func2Tensor x3G_1 = [] (const Domain& x) {
                            return MatrixRT{{1.0*x[2], 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}};    //TODO könnte hier sign übergeben?
                        };

    Func2Tensor x3G_2 = [] (const Domain& x) {
                            return MatrixRT{{0.0, 0.0, 0.0}, {0.0, 1.0*x[2], 0.0}, {0.0, 0.0, 0.0}};
                        };

    Func2Tensor x3G_3 = [] (const Domain& x) {                                                                               
                            return MatrixRT{{0.0, 1.0/sqrt(2.0)*x[2], 0.0}, {1.0/sqrt(2.0)*x[2], 0.0, 0.0}, {0.0, 0.0, 0.0}};
                        };
                        
    Func2Tensor x3G_1neg = [x3G_1] (const Domain& x) {return -1.0*x3G_1(x);};
    Func2Tensor x3G_2neg = [x3G_2] (const Domain& x) {return -1.0*x3G_2(x);};
    Func2Tensor x3G_3neg = [x3G_3] (const Domain& x) {return -1.0*x3G_3(x);};
    
    
    //TEST 
//     auto QuadraticForm = [] (const double mu, const double lambda, const MatrixRT& M) {
// 
//                                 return lambda*std::pow(trace(M),2) + 2*mu*pow(norm(sym(M)),2);
//                         };


    // TEST - energy method ///
    // different indicatorFunction in muTerm has impact here !!
    //     double GGterm = 0.0;
    //     GGterm = energy(Basis_CE, muLocal, lambdaLocal, x3G_1, x3G_1  );   // <L i(x3G_alpha) , i(x3G_beta) >
    //     std::cout << "GGTerm:" << GGterm << std::endl;
    //     std::cout << " analyticGGTERM:" << (mu1*(1-theta)+mu2*theta)/6.0 << std::endl;


    ///////////////////////////////////////////////
    // Basis for R_sym^{2x2}
    //////////////////////////////////////////////
    MatrixRT G_1 {{1.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0, 0.0}};
    MatrixRT G_2 {{0.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0, 0.0, 0.0}};
    MatrixRT G_3 {{0.0, 1.0/sqrt(2.0), 0.0}, {1.0/sqrt(2.0), 0.0, 0.0}, {0.0, 0.0, 0.0}};
    std::array<MatrixRT,3 > basisContainer = {G_1, G_2, G_3};
    //  printmatrix(std::cout, basisContainer[0] , "G_1", "--");
    //  printmatrix(std::cout, basisContainer[1] , "G_2", "--");
    //  printmatrix(std::cout, basisContainer[2] , "´G_3", "--");
    //  log <<  basisContainer[0] << std::endl;


    //////////////////////////////////////////////////////////////////////
    // Determine global indices of components for arbitrary (local) index
    //////////////////////////////////////////////////////////////////////
    int arbitraryLeafIndex =  parameterSet.get<unsigned int>("arbitraryLeafIndex", 0);            // localIdx..assert Number < #ShapeFcts 
    int arbitraryElementNumber =  parameterSet.get<unsigned int>("arbitraryElementNumber", 0);

    FieldVector<int,3> row;
     row = arbitraryComponentwiseIndices(Basis_CE,arbitraryElementNumber,arbitraryLeafIndex);
    if(print_debug)
      printvector(std::cout, row, "row" , "--");

    /////////////////////////////////////////////////////////
    // Assemble the system
    /////////////////////////////////////////////////////////
    assembleCellStiffness(Basis_CE, muLocal, lambdaLocal, gamma,  stiffnessMatrix_CE, parameterSet);
    assembleCellLoad(Basis_CE, muLocal, lambdaLocal,gamma, load_alpha1 ,x3G_1neg);
    assembleCellLoad(Basis_CE, muLocal, lambdaLocal,gamma, load_alpha2 ,x3G_2neg);
    assembleCellLoad(Basis_CE, muLocal, lambdaLocal,gamma, load_alpha3 ,x3G_3neg);
    //   printmatrix(std::cout, stiffnessMatrix_CE, "StiffnessMatrix", "--");
    //   printvector(std::cout, load_alpha1, "load_alpha1", "--");



    // set one basis-function to zero
    if(set_oneBasisFunction_Zero)
    {
        setOneBaseFunctionToZero(Basis_CE, stiffnessMatrix_CE, load_alpha1, load_alpha2, load_alpha3, parameterSet);
    //     printmatrix(std::cout, stiffnessMatrix_CE, "StiffnessMatrix after setOneBasisFunctionToZero", "--");
    //     printvector(std::cout, load_alpha1, "load_alpha1 after setOneBaseFunctionToZero", "--");
    }

    ////////////////////////////////////////////////////
    // Compute solution
    ////////////////////////////////////////////////////
    VectorCT x_1 = load_alpha1;
    VectorCT x_2 = load_alpha2;
    VectorCT x_3 = load_alpha3;

//     auto load_alpha1BS = load_alpha1;
    //   printvector(std::cout, load_alpha1, "load_alpha1 before SOLVER", "--" );
    //   printvector(std::cout, load_alpha2, "load_alpha2 before SOLVER", "--" );

    if (Solvertype == 1)  // CG - SOLVER
    {
        std::cout << "------------ CG - Solver ------------" << std::endl;
        MatrixAdapter<MatrixCT, VectorCT, VectorCT> op(stiffnessMatrix_CE);



        // Sequential incomplete LU decomposition as the preconditioner
        SeqILU<MatrixCT, VectorCT, VectorCT> ilu0(stiffnessMatrix_CE,1.0);
        int iter = parameterSet.get<double>("iterations_CG", 999);
        // Preconditioned conjugate-gradient solver
        CGSolver<VectorCT> solver(op,
                                ilu0, //NULL,
                                1e-8, // desired residual reduction factorlack
                                iter,   // maximum number of iterations
                                Solver_verbosity);   // verbosity of the solver
        InverseOperatorResult statistics;
        std::cout << "solve linear system for x_1.\n";
        solver.apply(x_1, load_alpha1, statistics);
        std::cout << "solve linear system for x_2.\n";
        solver.apply(x_2, load_alpha2, statistics);
        std::cout << "solve linear system for x_3.\n";
        solver.apply(x_3, load_alpha3, statistics);
        log << "Solver-type used: " <<" CG-Solver" << std::endl;
    }
    ////////////////////////////////////////////////////////////////////////////////////

    else if (Solvertype ==2)  // GMRES - SOLVER
    {

        std::cout << "------------ GMRES - Solver ------------" << std::endl;
        // Turn the matrix into a linear operator
        MatrixAdapter<MatrixCT,VectorCT,VectorCT> stiffnessOperator(stiffnessMatrix_CE);

        // Fancy (but only) way to not have a preconditioner at all
        Richardson<VectorCT,VectorCT> preconditioner(1.0);

        // Construct the iterative solver
        RestartedGMResSolver<VectorCT> solver(
            stiffnessOperator, // Operator to invert
            preconditioner,    // Preconditioner
            1e-10,             // Desired residual reduction factor
            500,               // Number of iterations between restarts,
                            // here: no restarting
            500,               // Maximum number of iterations
            Solver_verbosity);                // Verbosity of the solver

        // Object storing some statistics about the solving process
        InverseOperatorResult statistics;

        // solve for different Correctors (alpha = 1,2,3)
        solver.apply(x_1, load_alpha1, statistics);
        solver.apply(x_2, load_alpha2, statistics);
        solver.apply(x_3, load_alpha3, statistics);
        log << "Solver-type used: " <<" GMRES-Solver" << std::endl;
    }
    ////////////////////////////////////////////////////////////////////////////////////
    else if ( Solvertype == 3)// QR - SOLVER
    {

        std::cout << "------------ QR - Solver ------------" << std::endl;
        log << "solveLinearSystems: We use QR solver.\n";
        //TODO install suitesparse
        SPQR<MatrixCT> sPQR(stiffnessMatrix_CE);
        sPQR.setVerbosity(1);
        InverseOperatorResult statisticsQR;

        sPQR.apply(x_1, load_alpha1, statisticsQR);
        sPQR.apply(x_2, load_alpha2, statisticsQR);
        sPQR.apply(x_3, load_alpha3, statisticsQR);
        log << "Solver-type used: " <<" QR-Solver" << std::endl;
    }
    //     printvector(std::cout, load_alpha1BS, "load_alpha1 before SOLVER", "--" );
    //     printvector(std::cout, load_alpha1, "load_alpha1 AFTER SOLVER", "--" );
    //     printvector(std::cout, load_alpha2, "load_alpha2 AFTER SOLVER", "--" );

    ////////////////////////////////////////////////////////////////////////////////////
    // Extract phi_alpha  &  M_alpha coefficients
    ////////////////////////////////////////////////////////////////////////////////////
    VectorCT phi_1, phi_2, phi_3;
    phi_1.resize(Basis_CE.size());
    phi_1 = 0;
    phi_2.resize(Basis_CE.size());
    phi_2 = 0;
    phi_3.resize(Basis_CE.size());
    phi_3 = 0;

    for(size_t i=0; i<Basis_CE.size(); i++)
    {
        phi_1[i] = x_1[i];
        phi_2[i] = x_2[i];
        phi_3[i] = x_3[i];
    }

    FieldVector<double,3> m_1, m_2, m_3;

    for(size_t i=0; i<3; i++)
    {
        m_1[i] = x_1[phiOffset+i];
        m_2[i] = x_2[phiOffset+i];
        m_3[i] = x_3[phiOffset+i];
    }
    // assemble M_alpha's (actually iota(M_alpha) )

    MatrixRT M_1(0), M_2(0), M_3(0);

    for(size_t i=0; i<3; i++)
    {
        M_1 += m_1[i]*basisContainer[i];
        M_2 += m_2[i]*basisContainer[i];
        M_3 += m_3[i]*basisContainer[i];
    }

    std::cout << "--- plot corrector-Matrices M_alpha --- " << std::endl;
    printmatrix(std::cout, M_1, "Corrector-Matrix M_1", "--");
    printmatrix(std::cout, M_2, "Corrector-Matrix M_2", "--");
    printmatrix(std::cout, M_3, "Corrector-Matrix M_3", "--");
    log << "---------- OUTPUT ----------" << std::endl;
    log << " --------------------" << std::endl;
    log << "Corrector-Matrix M_1: \n" << M_1 << std::endl;
    log << " --------------------" << std::endl;
    log << "Corrector-Matrix M_2: \n" << M_2 << std::endl;
    log << " --------------------" << std::endl;
    log << "Corrector-Matrix M_3: \n" << M_3 << std::endl;
    log << " --------------------" << std::endl;

    ////////////////////////////////////////////////////////////////////////////
    // Substract Integral-mean
    ////////////////////////////////////////////////////////////////////////////                                                                         
    using SolutionRange = FieldVector<double,dim>;

    if(write_IntegralMean)
    {
        std::cout << "check integralMean phi_1: " << std::endl;
        auto A = integralMean(Basis_CE,phi_1);
        for(size_t i=0; i<3; i++)
        {
            std::cout << "Integral-Mean : " << A[i] << std::endl;
        }
    }
    if(substract_integralMean)
    {
        std::cout << " --- substracting integralMean --- " << std::endl;
        subtractIntegralMean(Basis_CE,  phi_1);
        subtractIntegralMean(Basis_CE,  phi_2);
        subtractIntegralMean(Basis_CE,  phi_3);
        subtractIntegralMean(Basis_CE,  x_1);
        subtractIntegralMean(Basis_CE,  x_2);
        subtractIntegralMean(Basis_CE,  x_3);
        //////////////////////////////////////////
        // CHECK INTEGRAL-MEAN:
        //////////////////////////////////////////
        if(write_IntegralMean)
        {
            auto A = integralMean(Basis_CE, phi_1);
            for(size_t i=0; i<3; i++)
            {
            std::cout << "Integral-Mean (CHECK)  : " << A[i] << std::endl;
            }
        }
    }

    /////////////////////////////////////////////////////////
    // Write Solution (Corrector Coefficients) in Logs
    /////////////////////////////////////////////////////////
//     log << "\nSolution of Corrector problems:\n";
    if(write_corrector_phi1)
    {
        log << "\nSolution of Corrector problems:\n";
        log << "\n Corrector_phi1:\n";
        log << x_1 << std::endl;
    }
    if(write_corrector_phi2)
    {
        log << "-----------------------------------------------------";
        log << "\n Corrector_phi2:\n";
        log << x_2 << std::endl;
    }
    if(write_corrector_phi3)
    {
        log << "-----------------------------------------------------";
        log << "\n Corrector_phi3:\n";
        log << x_3 << std::endl;
    }

    ////////////////////////////////////////////////////////////////////////////
    // Make a discrete function from the FE basis and the coefficient vector
    ////////////////////////////////////////////////////////////////////////////                                                                                     // ERROR
    auto correctorFunction_1 = Functions::makeDiscreteGlobalBasisFunction<SolutionRange>(
        Basis_CE,
        phi_1);

    auto correctorFunction_2 = Functions::makeDiscreteGlobalBasisFunction<SolutionRange>(
        Basis_CE,
        phi_2);

    auto correctorFunction_3 = Functions::makeDiscreteGlobalBasisFunction<SolutionRange>(
        Basis_CE,
        phi_3);

    /////////////////////////////////////////////////////////
    // Create Containers for Basis-Matrices, Correctors and Coefficients
    /////////////////////////////////////////////////////////
    const std::array<Func2Tensor, 3> x3MatrixBasis = {x3G_1, x3G_2, x3G_3};
    const std::array<VectorCT, 3> coeffContainer = {x_1, x_2, x_3};
    const std::array<VectorCT, 3> loadContainer = {load_alpha1, load_alpha2, load_alpha3};
    const std::array<MatrixRT*, 3 > mContainer = {&M_1 , &M_2, & M_3};


    /////////////////////////////////////////////////////////
    // Compute effective quantities: Elastic law & Prestrain
    /////////////////////////////////////////////////////////
    MatrixRT Q(0);
    VectorCT tmp1,tmp2;
    FieldVector<double,3> B_hat ;

    // Compute effective elastic law Q
    for(size_t a = 0; a < 3; a++)
        for(size_t b=0; b < 3; b++)
        {
            assembleCellLoad(Basis_CE, muLocal, lambdaLocal, gamma, tmp1 ,x3MatrixBasis[b]);   // <L i(M_alpha) + sym(grad phi_alpha), i(x3G_beta) >

            double GGterm = 0.0;
            GGterm = energy(Basis_CE, muLocal, lambdaLocal, x3MatrixBasis[a] , x3MatrixBasis[b]  );   // <L i(x3G_alpha) , i(x3G_beta) >

            // TEST
            // std::setprecision(std::numeric_limits<float>::digits10);

            Q[a][b] =  (coeffContainer[a]*tmp1) + GGterm;                         // seems symmetric...check positiv definitness?
        
            if (print_debug)
            {
                std::cout << "analyticGGTERM:" << (mu1*(1-theta)+mu2*theta)/6.0 << std::endl;
                std::cout << "GGTerm:" << GGterm << std::endl;
                std::cout << "coeff*tmp: " << coeffContainer[a]*tmp1 << std::endl;
            }
        }
    printmatrix(std::cout, Q, "Matrix Q", "--");
    log << "Effective Matrix Q: " << std::endl;
    log << Q << std::endl;

    // compute B_hat
    for(size_t a = 0; a<3; a++)
    {
        assembleCellLoad(Basis_CE, muLocal, lambdaLocal, gamma, tmp2 ,B_Term);  // <L i(M_alpha) + sym(grad phi_alpha), B >
        auto GBterm = energy(Basis_CE, muLocal, lambdaLocal, x3MatrixBasis[a] , B_Term); // <L i(x3G_alpha) , B>
        B_hat[a] = (coeffContainer[a]*tmp2) + GBterm;
        
        if (print_debug)
        {
            std::cout << "check this Contribution: " << (coeffContainer[a]*tmp2) << std::endl;  //see orthotropic.tex
        }
    }

//     log << B_hat << std::endl;
//     log << "Prestrain B_hat: " << std::endl;
//     log << B_hat << std::endl;

    std::cout << "check this Contribution: " << (coeffContainer[2]*tmp2) << std::endl;  //see orthotropic.tex

    ///////////////////////////////
    // Compute effective Prestrain B_eff
    //////////////////////////////
    FieldVector<double, 3> Beff;
    Q.solve(Beff,B_hat);
    
    log << "--- Prestrain Output --- " << std::endl;
    log << "B_hat: " << B_hat << std::endl;
    log << "B_eff: " << Beff <<  " (Effective Prestrain)" << std::endl;
    log << "------------------------ " << std::endl;
//     log << Beff << std::endl;
//     log << "Effective Prestrain B_eff: " << std::endl;
//     log << Beff << std::endl;
//     printvector(std::cout, Beff, "Beff", "--");

    auto q1 = Q[0][0];
    auto q2 = Q[1][1];
    auto q3 = Q[2][2];
    
    std::cout<< "q1 : " << q1 << std::endl;
    std::cout<< "q2 : " << q2 << std::endl;
    std::cout.precision(10);
    std::cout << "q3 : " << std::fixed << q3 << std::endl;
//     std::cout<< "q3 : " << q3 << std::endl;
    std::cout<< std::fixed << std::setprecision(6) << "q_onetwo=" << Q[0][1] << std::endl;
//     std::cout<< std::fixed << std::setprecision(6) << "q_onetwo=" << Q[1][0] << std::endl; //TEST 
    printvector(std::cout, B_hat, "B_hat", "--");
    printvector(std::cout, Beff, "Beff", "--");
    
    std::cout << "Theta: " << theta << std::endl;
    std::cout << "Gamma: " << gamma << std::endl;
    std::cout << "Number of Elements in each direction: " << nElements << std::endl;
    log << "q1=" << q1 << std::endl;
    log << "q2=" << q2 << std::endl;
    log << "q3=" << q3 << std::endl;
    log << "q12=" << Q[0][1] << std::endl;
    
    log << "b1=" << Beff[0] << std::endl;
    log << "b2=" << Beff[1] << std::endl;
    log << "b3=" << Beff[2] << std::endl;
    log << "b1_hat: " << B_hat[0] << std::endl;
    log << "b2_hat: " << B_hat[1] << std::endl;
    log << "b3_hat: " << B_hat[2] << std::endl;
    log << "mu_gamma=" << q3 << std::endl;           // added for Python-Script
 
    log << std::fixed << std::setprecision(6) << "q_onetwo=" << Q[0][1] << std::endl;
//     log << "q_onetwo=" << Q[0][1] << std::endl;           // added for Python-Script

    //////////////////////////////////////////////////////////////
    // Define Analytic Solutions
    //////////////////////////////////////////////////////////////

    
    
    
    
    
    std::cout << "Test B_hat & B_eff" << std::endl;
     double p1 = parameterSet.get<double>("rho1", 1.0);
     double alpha = parameterSet.get<double>("alpha", 2.0);
     double p2 = alpha*p1;
     
    if (imp == "parametrized_Laminate" && lambda1==0 )  // only in this case we know an analytical solution
    {
        double rho1 = parameterSet.get<double>("rho1", 1.0);
//         double b1_hat_ana = (-(theta/4.0)*mu1*mu2)/(theta*mu1+(1.0-theta)*mu2);
//         double b2_hat_ana = -(theta/4.0)*mu2;
//         double b3_hat_ana = 0.0;
    
        double b1_eff_ana = (3.0/2.0)*rho1*(1-theta*(1+alpha));
        double b2_eff_ana = (3.0/2.0)*rho1*((1-theta*(1+beta*alpha))/(1-theta+theta*beta));
        double b3_eff_ana = 0.0;
        
    //     double q1_ana = ((mu1*mu2)/6.0)/(theta*mu1+ (1.0- theta)*mu2);
    //     double q2_ana = ((1.0-theta)*mu1+theta*mu2)/6.0;
        double q1_ana = mu1*(beta/(theta+(1-theta)*beta)) *(1.0/6.0);  // 1/6 * harmonicMean
        double q2_ana = mu1*((1-theta)+theta*beta)        *(1.0/6.0);     // 1/6 * arithmeticMean

        std::cout << "----- print analytic solutions -----" << std::endl;
//         std::cout << "b1_hat_ana : " << b1_hat_ana << std::endl;
//         std::cout << "b2_hat_ana : " << b2_hat_ana << std::endl;
//         std::cout << "b3_hat_ana : " << b3_hat_ana << std::endl;
        std::cout << "b1_eff_ana : " << b1_eff_ana << std::endl;
        std::cout << "b2_eff_ana : " << b2_eff_ana << std::endl;
        std::cout << "b3_eff_ana : " << b3_eff_ana << std::endl;
        
        std::cout << "q1_ana : "     << q1_ana << std::endl;
        std::cout << "q2_ana : "     << q2_ana << std::endl;
        std::cout << "q3 should be between q1 and q2"  << std::endl;
        log << "----- print analytic solutions -----" << std::endl;
//         log << "b1_hat_ana : " << b1_hat_ana << std::endl;
//         log << "b2_hat_ana : " << b2_hat_ana << std::endl;
//         log << "b3_hat_ana : " << b3_hat_ana << std::endl;
        log << "b1_eff_ana : " << b1_eff_ana << std::endl;
        log << "b2_eff_ana : " << b2_eff_ana << std::endl;
        log << "b3_eff_ana : " << b3_eff_ana << std::endl;
        log << "q1_ana : "     << q1_ana << std::endl;
        log << "q2_ana : "     << q2_ana << std::endl;
        log << "q3 should be between q1 and q2"  << std::endl;
        
        Storage_Quantities.push_back(std::abs(q1_ana - q1));
        Storage_Quantities.push_back(std::abs(q2_ana - q2));
        Storage_Quantities.push_back(q3);
        Storage_Quantities.push_back(std::abs(b1_eff_ana - Beff[0]));
        Storage_Quantities.push_back(std::abs(b2_eff_ana - Beff[1]));
        Storage_Quantities.push_back(std::abs(b3_eff_ana - Beff[2]));
    }
    else if (imp == "analytical_Example")   // print Errors only for analytical_Example
    {
        std::cout << ((3.0*p1)/2.0)*beta*(1-(theta*(1+alpha)))   << std::endl;  // TODO ERROR in paper ?? 
        
        // double b1 = (mu1*p1/4)*(beta/(theta+(1-theta)*beta))*(1-theta*(1+alpha));
        // double b2 = (mu1*p1/8)*(1-theta*(1+beta*alpha));
        double b1_hat_ana = (-(theta/4.0)*mu1*mu2)/(theta*mu1+(1.0-theta)*mu2);
        double b2_hat_ana = -(theta/4.0)*mu2;
        double b3_hat_ana = 0.0;
    
        double b1_eff_ana = (-3.0/2.0)*theta;
        double b2_eff_ana = (-3.0/2.0)*(theta*mu2)/(mu1*(1-theta)+mu2*theta);
        double b3_eff_ana = 0.0;
        
    //     double q1_ana = ((mu1*mu2)/6.0)/(theta*mu1+ (1.0- theta)*mu2);
    //     double q2_ana = ((1.0-theta)*mu1+theta*mu2)/6.0;
        double q1_ana = mu1*(beta/(theta+(1-theta)*beta)) *(1.0/6.0);  // 1/6 * harmonicMean
        double q2_ana = mu1*((1-theta)+theta*beta)        *(1.0/6.0);     // 1/6 * arithmeticMean

        std::cout << "----- print analytic solutions -----" << std::endl;
        std::cout << "b1_hat_ana : " << b1_hat_ana << std::endl;
        std::cout << "b2_hat_ana : " << b2_hat_ana << std::endl;
        std::cout << "b3_hat_ana : " << b3_hat_ana << std::endl;
        std::cout << "b1_eff_ana : " << b1_eff_ana << std::endl;
        std::cout << "b2_eff_ana : " << b2_eff_ana << std::endl;
        std::cout << "b3_eff_ana : " << b3_eff_ana << std::endl;
        
        std::cout << "q1_ana : "     << q1_ana << std::endl;
        std::cout << "q2_ana : "     << q2_ana << std::endl;
        std::cout << "q3 should be between q1 and q2"  << std::endl;
        log << "----- print analytic solutions -----" << std::endl;
        log << "b1_hat_ana : " << b1_hat_ana << std::endl;
        log << "b2_hat_ana : " << b2_hat_ana << std::endl;
        log << "b3_hat_ana : " << b3_hat_ana << std::endl;
        log << "b1_eff_ana : " << b1_eff_ana << std::endl;
        log << "b2_eff_ana : " << b2_eff_ana << std::endl;
        log << "b3_eff_ana : " << b3_eff_ana << std::endl;
        log << "q1_ana : "     << q1_ana << std::endl;
        log << "q2_ana : "     << q2_ana << std::endl;
        log << "q3 should be between q1 and q2"  << std::endl;
        
        Storage_Quantities.push_back(std::abs(q1_ana - q1));
        Storage_Quantities.push_back(std::abs(q2_ana - q2));
        Storage_Quantities.push_back(q3);
        Storage_Quantities.push_back(std::abs(b1_eff_ana - Beff[0]));
        Storage_Quantities.push_back(std::abs(b2_eff_ana - Beff[1]));
        Storage_Quantities.push_back(std::abs(b3_eff_ana - Beff[2]));
    }
    else
    {
        Storage_Quantities.push_back(q1);
        Storage_Quantities.push_back(q2);
        Storage_Quantities.push_back(q3);
        Storage_Quantities.push_back(Beff[0]);
        Storage_Quantities.push_back(Beff[1]);
        Storage_Quantities.push_back(Beff[2]);
    }


    auto symPhi_1_analytic = [mu1, mu2, theta, muTerm] (const Domain& x) {
                        return MatrixRT{{  (((mu1*mu2)/((theta*mu1 +(1-theta)*mu2)*muTerm(x))) - 1)*x[2] , 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}};
                    };

    auto zeroFunction = [] (const Domain& x) {
                    return MatrixRT{{0.0 , 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}};
                    };

    if(write_L2Error)
    {
//         std::cout << " ----- L2ErrorSym ----" << std::endl;
        auto L2SymError = computeL2SymError(Basis_CE,phi_1,gamma,symPhi_1_analytic);
//         std::cout << "L2SymError: " << L2SymError << std::endl;
//         std::cout << " -----------------" << std::endl;

//         std::cout << " ----- L2NORMSym ----" << std::endl;
        auto L2Norm_Symphi = computeL2SymError(Basis_CE,phi_1,gamma,zeroFunction);                           
//         std::cout << "L2-Norm(Symphi_1): " << L2Norm_Symphi << std::endl;           
        VectorCT zeroVec;
        zeroVec.resize(Basis_CE.size());
        zeroVec = 0;
        auto L2Norm_SymAnalytic = computeL2SymError(Basis_CE,zeroVec ,gamma, symPhi_1_analytic);
//         std::cout << "L2-Norm(SymAnalytic): " << L2Norm_SymAnalytic << std::endl;
//         std::cout << " -----------------" << std::endl;

//         std::cout << " ----- L2NORM ----" << std::endl;
        auto L2Norm = computeL2Norm(Basis_CE,phi_1);
//         std::cout << "L2Norm(phi_1): "  << L2Norm << std::endl;
//         std::cout << " -----------------" << std::endl;
        
        
        
//         log << "L2-Error (symmetric Gradient phi_1):" << L2SymError << std::endl;
//         log << "L2-Norm(Symphi_1): "    << L2Norm_Symphi<< std::endl;
//         log << "L2-Norm(SymAnalytic): " << L2Norm_SymAnalytic << std::endl;

        double EOC = 0.0;
        Storage_Error.push_back(L2SymError);

        //Compute Experimental order of convergence (EOC)
        if(levelCounter > 0)
        {
            // Storage_Error:: #1 level #2 L2SymError #3 L2SymErrorOrder #4  L2Norm(sym) #5 L2Norm(sym-analytic) #6 L2Norm(phi_1)
//             std::cout << " ((levelCounter-1)*6)+1: " << ((levelCounter-1)*6)+1 << std::endl;           // Besser std::map ???
//             std::cout << " ((levelCounter-1)*6)+1: " << ((levelCounter)*6)+1 << std::endl;             // für Storage_Error[idx] muss idx zur compile time feststehen?!
            auto ErrorOld = std::get<double>(Storage_Error[((levelCounter-1)*6)+1]);
            auto ErrorNew = std::get<double>(Storage_Error[(levelCounter*6)+1]);
//
            EOC = std::log(ErrorNew/ErrorOld)/(-1*std::log(2));
            //   std::cout << "Storage_Error[0] :" << std::get<1>(Storage_Error[0]) << std::endl;
            //   std::cout << "output variant :" << std::get<std::string>(Storage_Error[1]) << std::endl;
            //   std::cout << "output variant :" << std::get<0>(Storage_Error[1]) << std::endl;
        }
//         Storage_Error.push_back(level);
        Storage_Error.push_back(EOC);
        Storage_Error.push_back(L2Norm_Symphi);
        Storage_Error.push_back(L2Norm_SymAnalytic);
        Storage_Error.push_back(L2Norm);
    }
  //////////////////////////////////////////////////////////////////////////////////////////////
  // Write Data to Matlab / Optimization-Code
  //////////////////////////////////////////////////////////////////////////////////////////////
//   writeMatrixToMatlab(Q, "../../Matlab-Programs/QMatrix.txt");
  writeMatrixToMatlab(Q, outputPath + "/QMatrix.txt");
  
  // write effective Prestrain in Matrix for Output
  FieldMatrix<double,1,3> BeffMat;
  BeffMat[0] = Beff;
//   writeMatrixToMatlab(BeffMat, "../../Matlab-Programs/BMatrix.txt");
  writeMatrixToMatlab(BeffMat, outputPath + "/BMatrix.txt");
  
  //////////////////////////////////////////////////////////////////////////////////////////////
  // Write result to VTK file
  //////////////////////////////////////////////////////////////////////////////////////////////
  if(write_VTK) 
  {
        std::string vtkOutputName = outputPath + "/CellProblem-result";
        
        std::cout << "vtkOutputName:" << vtkOutputName << std::endl;
        
        VTKWriter<GridView> vtkWriter(gridView_CE);

        vtkWriter.addVertexData(
            correctorFunction_1,
            VTK::FieldInfo("Corrector phi_1 level"+ std::to_string(level) , VTK::FieldInfo::Type::vector, dim));     
        vtkWriter.addVertexData(
            correctorFunction_2,
            VTK::FieldInfo("Corrector phi_2 level"+ std::to_string(level) , VTK::FieldInfo::Type::vector, dim));
        vtkWriter.addVertexData(
            correctorFunction_3,
            VTK::FieldInfo("Corrector phi_3 level"+ std::to_string(level) , VTK::FieldInfo::Type::vector, dim));
        //   vtkWriter.write( vtkOutputName );
        vtkWriter.write(vtkOutputName  + "-level"+ std::to_string(level));
        //     vtkWriter.pwrite( vtkOutputName  + "-level"+ std::to_string(level), outputPath, "");   // TEST Write to folder "/outputs" 
        //   vtkWriter.pwrite( vtkOutputName  + "-level"+ std::to_string(level), outputPath, "", VTK::OutputType::ascii, 0, 0 );
        std::cout << "wrote data to file: " + vtkOutputName + "-level" + std::to_string(level) << std::endl;      
  }
  
  
   if (write_materialFunctions)
   {
        using VTKGridType = YaspGrid<dim, EquidistantOffsetCoordinates<double, dim> >;
//         VTKGridType grid_VTK({-1.0/2.0, -1.0/2.0, -1.0/2.0},{1.0/2.0, 1.0/2.0, 1.0/2.0},{80,80,80});
        VTKGridType grid_VTK({-1.0/2.0, -1.0/2.0, -1.0/2.0},{1.0/2.0, 1.0/2.0, 1.0/2.0},nElements);
        
        using GridViewVTK = VTKGridType::LeafGridView;
        const GridViewVTK gridView_VTK = grid_VTK.leafGridView();
        
        auto scalarP0FeBasis = makeBasis(gridView_VTK,lagrange<0>());
        auto scalarP1FeBasis = makeBasis(gridView_VTK,lagrange<1>());

        std::vector<double> mu_CoeffP0;
        Functions::interpolate(scalarP0FeBasis, mu_CoeffP0, muTerm);
        auto mu_DGBF_P0 = Functions::makeDiscreteGlobalBasisFunction<double>(scalarP0FeBasis, mu_CoeffP0);
        
        std::vector<double> mu_CoeffP1;
        Functions::interpolate(scalarP1FeBasis, mu_CoeffP1, muTerm);
        auto mu_DGBF_P1 = Functions::makeDiscreteGlobalBasisFunction<double>(scalarP1FeBasis, mu_CoeffP1);
        
        
        std::vector<double> lambda_CoeffP0;
        Functions::interpolate(scalarP0FeBasis, lambda_CoeffP0, lambdaTerm);
        auto lambda_DGBF_P0 = Functions::makeDiscreteGlobalBasisFunction<double>(scalarP0FeBasis, lambda_CoeffP0);
        
        std::vector<double> lambda_CoeffP1;
        Functions::interpolate(scalarP1FeBasis, lambda_CoeffP1, lambdaTerm);
        auto lambda_DGBF_P1 = Functions::makeDiscreteGlobalBasisFunction<double>(scalarP1FeBasis, lambda_CoeffP1);
        
        VTKWriter<GridView> MaterialVtkWriter(gridView_VTK);
        
        MaterialVtkWriter.addVertexData(
            mu_DGBF_P1,
            VTK::FieldInfo("mu_P1", VTK::FieldInfo::Type::scalar, 1));    
        MaterialVtkWriter.addCellData(
            mu_DGBF_P0,
            VTK::FieldInfo("mu_P0", VTK::FieldInfo::Type::scalar, 1));    
        MaterialVtkWriter.addVertexData(
            lambda_DGBF_P1,
            VTK::FieldInfo("lambda_P1", VTK::FieldInfo::Type::scalar, 1));    
        MaterialVtkWriter.addCellData(
            lambda_DGBF_P0,
            VTK::FieldInfo("lambda_P0", VTK::FieldInfo::Type::scalar, 1));    
        
        MaterialVtkWriter.write(outputPath + "/MaterialFunctions-level"+ std::to_string(level) );
        std::cout << "wrote data to file:" + outputPath +"/MaterialFunctions-level" + std::to_string(level) << std::endl;  
        
   }
  
   if (write_prestrainFunctions)
   {
    using VTKGridType = YaspGrid<dim, EquidistantOffsetCoordinates<double, dim> >;
    VTKGridType grid_VTK({-1.0/2.0, -1.0/2.0, -1.0/2.0},{1.0/2.0, 1.0/2.0, 1.0/2.0},{80,80,80});
    using GridViewVTK = VTKGridType::LeafGridView;
    const GridViewVTK gridView_VTK = grid_VTK.leafGridView();
   
    FTKfillerContainer<dim> VTKFiller;
    VTKFiller.vtkPrestrainNorm(gridView_VTK, B_Term, "PrestrainBNorm");
    
    // WORKS Too 
    VTKFiller.vtkProblemCell(gridView_VTK, B_Term, muLocal,"VTKProblemCell");;
    
    
    // TEST 
    auto scalarP0FeBasis = makeBasis(gridView_VTK,lagrange<0>());
    auto scalarP1FeBasis = makeBasis(gridView_VTK,lagrange<1>());

    std::vector<double> B_CoeffP0;
    Functions::interpolate(scalarP0FeBasis, B_CoeffP0, B_Term);
    auto B_DGBF_P0 = Functions::makeDiscreteGlobalBasisFunction<double>(scalarP0FeBasis, B_CoeffP0);
        

        
    VTKWriter<GridView> PrestrainVtkWriter(gridView_VTK);
         
    PrestrainVtkWriter.addCellData(
            B_DGBF_P0,
            VTK::FieldInfo("B_P0", VTK::FieldInfo::Type::scalar, 1));     
        
    PrestrainVtkWriter.write(outputPath + "/PrestrainFunctions-level"+ std::to_string(level) );
    std::cout << "wrote data to file:" + outputPath +"/PrestrainFunctions-level" + std::to_string(level) << std::endl; 

   }
  
  
  levelCounter++; 
  } // Level-Loop End
  
    
  

    /////////////////////////////////////////
    // Print Storage
    /////////////////////////////////////////
    int tableWidth = 12;
    if (imp == "analytical_Example")   // print Errors only for analytical_Example
    {
        std::cout << std::string(tableWidth*7 + 3*7, '-') << "\n";
        std::cout << center("Levels",tableWidth)       << " | "
                << center("L2SymError",tableWidth)     << " | "
                << center("Order",tableWidth)     << " | "
                << center("L2SymNorm",tableWidth)     << " | "
                << center("L2SymNorm_ana",tableWidth)     << " | "
                << center("L2Norm",tableWidth)  << " | " << "\n";
        std::cout << std::string(tableWidth*6 + 3*6, '-') << "\n";
        log     << std::string(tableWidth*6 + 3*6, '-') << "\n";
        log     << center("Levels",tableWidth)       << " | "
                << center("L2SymError",tableWidth)     << " | "
                << center("Order",tableWidth)     << " | "
                << center("L2SymNorm",tableWidth)     << " | "
                << center("L2SNorm_ana",tableWidth)     << " | "
                << center("L2Norm",tableWidth)  << " | " << "\n";
        log     << std::string(tableWidth*6 + 3*6, '-') << "\n";
        
        int StorageCount = 0;

        for(auto& v: Storage_Error) 
        {
            std::visit([tableWidth](auto&& arg){std::cout << center(prd(arg,5,1),tableWidth)      << " | ";}, v);      // Anzahl-Nachkommastellen
            std::visit([tableWidth, &log](auto&& arg){log << center(prd(arg,5,1),tableWidth)      << " & ";}, v);
            StorageCount++;
            if(StorageCount % 6 == 0 )
            {
                std::cout << std::endl;
                log << std::endl;
            }
        }
    }
    
    //////////////// OUTPUT QUANTITIES TABLE //////////////
    if (imp == "analytical_Example" || (imp == "parametrized_Laminate" && lambda1==0 ) )   // print Errors only for analytical_Example
    {
    std::cout << std::string(tableWidth*7 + 3*7, '-') << "\n";
    std::cout << center("Levels ",tableWidth)       << " | "
              << center("|q1_ana-q1|",tableWidth)       << " | "
              << center("|q2_ana-q2|",tableWidth)       << " | "
              << center("q3",tableWidth)           << " | "
              << center("|b1_ana-b1|",tableWidth)       << " | "
              << center("|b2_ana-b2|",tableWidth)       << " | "
              << center("|b3_ana-b3|",tableWidth)       << " | " << "\n";
    std::cout << std::string(tableWidth*7 + 3*7, '-') << "\n";
    log       << std::string(tableWidth*7 + 3*7, '-') << "\n";
    log       << center("Levels ",tableWidth)       << " | "
              << center("|q1_ana-q1|",tableWidth)       << " | "
              << center("|q2_ana-q2|",tableWidth)       << " | "
              << center("q3",tableWidth)           << " | "
              << center("|b1_ana-b1|",tableWidth)       << " | "
              << center("|b2_ana-b2|",tableWidth)       << " | "
              << center("|b3_ana-b3|",tableWidth)       << " | " << "\n";
    log       << std::string(tableWidth*7 + 3*7, '-') << "\n";
    }
    else
    {
    std::cout << center("Levels ",tableWidth)       << " | "
              << center("q1",tableWidth)       << " | "
              << center("q2",tableWidth)       << " | "
              << center("q3",tableWidth)           << " | "
              << center("b1",tableWidth)       << " | "
              << center("b2",tableWidth)       << " | "
              << center("b3",tableWidth)       << " | " << "\n";
    std::cout << std::string(tableWidth*7 + 3*7, '-') << "\n";
    log       << std::string(tableWidth*7 + 3*7, '-') << "\n";   
    log       << center("Levels ",tableWidth)       << " | "
              << center("q1",tableWidth)       << " | "
              << center("q2",tableWidth)       << " | "
              << center("q3",tableWidth)           << " | "
              << center("b1",tableWidth)       << " | "
              << center("b2",tableWidth)       << " | "
              << center("b3",tableWidth)       << " | " << "\n";
    log       << std::string(tableWidth*7 + 3*7, '-') << "\n";   
    }
    
    int StorageCount2 = 0;
    for(auto& v: Storage_Quantities) 
    {
        std::visit([tableWidth](auto&& arg){std::cout << center(prd(arg,5,1),tableWidth)      << " | ";}, v);
        std::visit([tableWidth, &log](auto&& arg){log << center(prd(arg,5,1),tableWidth)      << " & ";}, v);
        StorageCount2++;
        if(StorageCount2 % 7 == 0 )
        {
            std::cout << std::endl;
            log << std::endl;
        }
    }
    std::cout << std::string(tableWidth*7 + 3*7, '-') << "\n";
    log       << std::string(tableWidth*7 + 3*7, '-') << "\n";  

    log.close();
    
//     std::cout << "Total time elapsed: " << globalTimer.elapsed() << std::endl;
}