#ifndef DUNE_GFE_MIXEDLOCALGFEADOLCSTIFFNESS_HH #define DUNE_GFE_MIXEDLOCALGFEADOLCSTIFFNESS_HH #include <adolc/adouble.h> // use of active doubles #include <adolc/drivers/drivers.h> // use of "Easy to Use" drivers // gradient(.) and hessian(.) #include <adolc/interfaces.h> // use of "Easy to Use" drivers #include <adolc/taping.h> // use of taping #include <dune/gfe/adolcnamespaceinjections.hh> #include <dune/common/fmatrix.hh> #include <dune/istl/matrix.hh> #include <dune/gfe/mixedlocalgeodesicfestiffness.hh> #define ADOLC_VECTOR_MODE /** \brief Assembles energy gradient and Hessian with ADOL-C (automatic differentiation) */ template<class Basis0, class TargetSpace0, class Basis1, class TargetSpace1> class MixedLocalGFEADOLCStiffness : public MixedLocalGeodesicFEStiffness<Basis0,TargetSpace0, Basis1,TargetSpace1> { static_assert(std::is_same<typename Basis0::GridView, typename Basis1::GridView>::value, "Basis0 and Basis1 must be designed on the same GridView!"); // grid types typedef typename Basis0::GridView GridView; typedef typename Basis0::LocalView::Tree::FiniteElement LocalFiniteElement0; typedef typename Basis1::LocalView::Tree::FiniteElement LocalFiniteElement1; typedef typename GridView::Grid::ctype DT; typedef typename TargetSpace0::ctype RT; typedef typename GridView::template Codim<0>::Entity Entity; // The 'active' target spaces, i.e., the number type is replaced by adouble typedef typename TargetSpace0::template rebind<adouble>::other ATargetSpace0; typedef typename TargetSpace1::template rebind<adouble>::other ATargetSpace1; // some other sizes enum {gridDim=GridView::dimension}; public: //! Dimension of a tangent space enum { blocksize0 = TargetSpace0::TangentVector::dimension }; enum { blocksize1 = TargetSpace1::TangentVector::dimension }; //! Dimension of the embedding space enum { embeddedBlocksize0 = TargetSpace0::EmbeddedTangentVector::dimension }; enum { embeddedBlocksize1 = TargetSpace1::EmbeddedTangentVector::dimension }; MixedLocalGFEADOLCStiffness(const MixedLocalGeodesicFEStiffness<Basis0, ATargetSpace0, Basis1, ATargetSpace1>* energy) : localEnergy_(energy) {} /** \brief Compute the energy at the current configuration */ virtual RT energy (const Entity& e, const LocalFiniteElement0& localFiniteElement0, const std::vector<TargetSpace0>& localConfiguration0, const LocalFiniteElement1& localFiniteElement1, const std::vector<TargetSpace1>& localConfiguration1) const; #if 0 /** \brief Assemble the element gradient of the energy functional This uses the automatic differentiation toolbox ADOL_C. */ virtual void assembleGradient(const Entity& element, const LocalFiniteElement& localFiniteElement, const std::vector<TargetSpace>& solution, std::vector<typename TargetSpace::TangentVector>& gradient) const; #endif /** \brief Assemble the local stiffness matrix at the current position This uses the automatic differentiation toolbox ADOL_C. */ virtual void assembleGradientAndHessian(const Entity& e, const LocalFiniteElement0& localFiniteElement0, const std::vector<TargetSpace0>& localConfiguration0, const LocalFiniteElement1& localFiniteElement1, const std::vector<TargetSpace1>& localConfiguration1, std::vector<typename TargetSpace0::TangentVector>& localGradient0, std::vector<typename TargetSpace1::TangentVector>& localGradient1); const MixedLocalGeodesicFEStiffness<Basis0, ATargetSpace0, Basis1, ATargetSpace1>* localEnergy_; }; template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1> typename MixedLocalGFEADOLCStiffness<Basis0, TargetSpace0, Basis1, TargetSpace1>::RT MixedLocalGFEADOLCStiffness<Basis0, TargetSpace0, Basis1, TargetSpace1>:: energy(const Entity& element, const LocalFiniteElement0& localFiniteElement0, const std::vector<TargetSpace0>& localConfiguration0, const LocalFiniteElement1& localFiniteElement1, const std::vector<TargetSpace1>& localConfiguration1) const { double pureEnergy; std::vector<ATargetSpace0> localAConfiguration0(localConfiguration0.size()); std::vector<ATargetSpace1> localAConfiguration1(localConfiguration1.size()); trace_on(1); adouble energy = 0; // The following loop is not quite intuitive: we copy the localSolution into an // array of FieldVector<double>, go from there to FieldVector<adouble> and // only then to ATargetSpace. // Rationale: The constructor/assignment-from-vector of TargetSpace frequently // contains a projection onto the manifold from the surrounding Euclidean space. // ADOL-C needs a function on the whole Euclidean space, hence that projection // is part of the function and needs to be taped. // The following variable cannot be declared inside of the loop, or ADOL-C will report wrong results // (Presumably because several independent variables use the same memory location.) std::vector<typename ATargetSpace0::CoordinateType> aRaw0(localConfiguration0.size()); for (size_t i=0; i<localConfiguration0.size(); i++) { typename TargetSpace0::CoordinateType raw = localConfiguration0[i].globalCoordinates(); for (size_t j=0; j<raw.size(); j++) aRaw0[i][j] <<= raw[j]; localAConfiguration0[i] = aRaw0[i]; // may contain a projection onto M -- needs to be done in adouble } std::vector<typename ATargetSpace1::CoordinateType> aRaw1(localConfiguration1.size()); for (size_t i=0; i<localConfiguration1.size(); i++) { typename TargetSpace1::CoordinateType raw = localConfiguration1[i].globalCoordinates(); for (size_t j=0; j<raw.size(); j++) aRaw1[i][j] <<= raw[j]; localAConfiguration1[i] = aRaw1[i]; // may contain a projection onto M -- needs to be done in adouble } energy = localEnergy_->energy(element, localFiniteElement0,localAConfiguration0, localFiniteElement1,localAConfiguration1); energy >>= pureEnergy; trace_off(); #if 0 size_t tape_stats[STAT_SIZE]; tapestats(1,tape_stats); // reading of tape statistics cout<<"maxlive "<<tape_stats[NUM_MAX_LIVES]<<"\n"; cout<<"tay_stack_size "<<tape_stats[TAY_STACK_SIZE]<<"\n"; cout<<"total number of operations "<<tape_stats[NUM_OPERATIONS]<<"\n"; // ..... print other tape stats #endif return pureEnergy; } #if 0 template <class GridView, class LocalFiniteElement, class TargetSpace> void LocalGeodesicFEADOLCStiffness<GridView, LocalFiniteElement, TargetSpace>:: assembleGradient(const Entity& element, const LocalFiniteElement& localFiniteElement, const std::vector<TargetSpace>& localSolution, std::vector<typename TargetSpace::TangentVector>& localGradient) const { // Tape energy computation. We may not have to do this every time, but it's comparatively cheap. energy(element, localFiniteElement, localSolution); // Compute the actual gradient size_t nDofs = localSolution.size(); size_t nDoubles = nDofs*embeddedBlocksize; std::vector<double> xp(nDoubles); int idx=0; for (size_t i=0; i<nDofs; i++) for (size_t j=0; j<embeddedBlocksize; j++) xp[idx++] = localSolution[i].globalCoordinates()[j]; // Compute gradient std::vector<double> g(nDoubles); gradient(1,nDoubles,xp.data(),g.data()); // gradient evaluation // Copy into Dune type std::vector<typename TargetSpace::EmbeddedTangentVector> localEmbeddedGradient(localSolution.size()); idx=0; for (size_t i=0; i<nDofs; i++) for (size_t j=0; j<embeddedBlocksize; j++) localEmbeddedGradient[i][j] = g[idx++]; // std::cout << "localEmbeddedGradient:\n"; // for (size_t i=0; i<nDofs; i++) // std::cout << localEmbeddedGradient[i] << std::endl; // Express gradient in local coordinate system for (size_t i=0; i<nDofs; i++) { Dune::FieldMatrix<RT,blocksize,embeddedBlocksize> orthonormalFrame = localSolution[i].orthonormalFrame(); orthonormalFrame.mv(localEmbeddedGradient[i],localGradient[i]); } } #endif // /////////////////////////////////////////////////////////// // Compute gradient and Hessian together // To compute the Hessian we need to compute the gradient anyway, so we may // as well return it. This saves assembly time. // /////////////////////////////////////////////////////////// template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1> void MixedLocalGFEADOLCStiffness<Basis0, TargetSpace0, Basis1, TargetSpace1>:: assembleGradientAndHessian(const Entity& element, const LocalFiniteElement0& localFiniteElement0, const std::vector<TargetSpace0>& localConfiguration0, const LocalFiniteElement1& localFiniteElement1, const std::vector<TargetSpace1>& localConfiguration1, std::vector<typename TargetSpace0::TangentVector>& localGradient0, std::vector<typename TargetSpace1::TangentVector>& localGradient1) { // Tape energy computation. We may not have to do this every time, but it's comparatively cheap. energy(element, localFiniteElement0, localConfiguration0, localFiniteElement1, localConfiguration1); ///////////////////////////////////////////////////////////////// // Compute the gradient. It is needed to transform the Hessian // into the correct coordinates. ///////////////////////////////////////////////////////////////// // Compute the actual gradient size_t nDofs0 = localConfiguration0.size(); size_t nDofs1 = localConfiguration1.size(); size_t nDoubles = nDofs0*embeddedBlocksize0 + nDofs1*embeddedBlocksize1; std::vector<double> xp(nDoubles); int idx=0; for (size_t i=0; i<localConfiguration0.size(); i++) for (size_t j=0; j<embeddedBlocksize0; j++) xp[idx++] = localConfiguration0[i].globalCoordinates()[j]; for (size_t i=0; i<localConfiguration1.size(); i++) for (size_t j=0; j<embeddedBlocksize1; j++) xp[idx++] = localConfiguration1[i].globalCoordinates()[j]; // Compute gradient std::vector<double> g(nDoubles); gradient(1,nDoubles,xp.data(),g.data()); // gradient evaluation // Copy into Dune type std::vector<typename TargetSpace0::EmbeddedTangentVector> localEmbeddedGradient0(localConfiguration0.size()); std::vector<typename TargetSpace1::EmbeddedTangentVector> localEmbeddedGradient1(localConfiguration1.size()); idx=0; for (size_t i=0; i<localConfiguration0.size(); i++) { for (size_t j=0; j<embeddedBlocksize0; j++) localEmbeddedGradient0[i][j] = g[idx++]; // Express gradient in local coordinate system localConfiguration0[i].orthonormalFrame().mv(localEmbeddedGradient0[i],localGradient0[i]); } for (size_t i=0; i<localConfiguration1.size(); i++) { for (size_t j=0; j<embeddedBlocksize1; j++) localEmbeddedGradient1[i][j] = g[idx++]; // Express gradient in local coordinate system localConfiguration1[i].orthonormalFrame().mv(localEmbeddedGradient1[i],localGradient1[i]); } ///////////////////////////////////////////////////////////////// // Compute Hessian ///////////////////////////////////////////////////////////////// // We compute the Hessian of the energy functional using the ADOL-C system. // Since ADOL-C does not know about nonlinear spaces, what we get is actually // the Hessian of a prolongation of the energy functional into the surrounding // Euclidean space. To obtain the Riemannian Hessian from this we apply the // formula described in Absil, Mahoney, Trumpf, "An extrinsic look at the Riemannian Hessian". // This formula consists of two steps: // 1) Remove all entries of the Hessian pertaining to the normal space of the // manifold. In the aforementioned paper this is done by projection onto the // tangent space. Since we want a matrix that is really smaller (but full rank again), // we can achieve the same effect by multiplying the embedded Hessian from the left // and from the right by the matrix of orthonormal frames. // 2) Add a correction involving the Weingarten map. // // This works, and is easy to implement using the ADOL-C "hessian" driver. // However, here we implement a small shortcut. Computing the embedded Hessian and // multiplying one side by the orthonormal frame is the same as evaluating the Hessian // (seen as an operator from R^n to R^n) in the directions of the vectors of the // orthonormal frame. By luck, ADOL-C can compute the evaluations of the Hessian in // a given direction directly (in fact, this is also how the "hessian" driver works). // Since there are less frame vectors than the dimension of the embedding space, // this reinterpretation allows to reduce the number of calls to ADOL-C. // In my Cosserat shell tests this reduced assembly time by about 10%. std::vector<Dune::FieldMatrix<RT,blocksize0,embeddedBlocksize0> > orthonormalFrame0(nDofs0); for (size_t i=0; i<nDofs0; i++) orthonormalFrame0[i] = localConfiguration0[i].orthonormalFrame(); std::vector<Dune::FieldMatrix<RT,blocksize1,embeddedBlocksize1> > orthonormalFrame1(nDofs1); for (size_t i=0; i<nDofs1; i++) orthonormalFrame1[i] = localConfiguration1[i].orthonormalFrame(); Dune::Matrix<Dune::FieldMatrix<double,blocksize0, embeddedBlocksize0> > embeddedHessian00(nDofs0,nDofs0); Dune::Matrix<Dune::FieldMatrix<double,blocksize0, embeddedBlocksize1> > embeddedHessian01(nDofs0,nDofs1); Dune::Matrix<Dune::FieldMatrix<double,blocksize1, embeddedBlocksize0> > embeddedHessian10(nDofs1,nDofs0); Dune::Matrix<Dune::FieldMatrix<double,blocksize1, embeddedBlocksize1> > embeddedHessian11(nDofs1,nDofs1); #ifndef ADOLC_VECTOR_MODE #error ADOL-C scalar mode not implemented #if 0 std::vector<double> v(nDoubles); std::vector<double> w(nDoubles); std::fill(v.begin(), v.end(), 0.0); for (int i=0; i<nDofs; i++) for (int ii=0; ii<blocksize; ii++) { // Evaluate Hessian in the direction of each vector of the orthonormal frame for (size_t k=0; k<embeddedBlocksize; k++) v[i*embeddedBlocksize + k] = orthonormalFrame[i][ii][k]; int rc= 3; MINDEC(rc, hess_vec(1, nDoubles, xp.data(), v.data(), w.data())); if (rc < 0) DUNE_THROW(Dune::Exception, "ADOL-C has returned with error code " << rc << "!"); for (int j=0; j<nDoubles; j++) embeddedHessian[i][j/embeddedBlocksize][ii][j%embeddedBlocksize] = w[j]; // Make v the null vector again std::fill(&v[i*embeddedBlocksize], &v[(i+1)*embeddedBlocksize], 0.0); } #endif #else int n = nDoubles; int nDirections = nDofs0 * blocksize0 + nDofs1 * blocksize1; double* tangent[nDoubles]; for(size_t i=0; i<nDoubles; i++) tangent[i] = (double*)malloc(nDirections*sizeof(double)); double* rawHessian[nDoubles]; for(size_t i=0; i<nDoubles; i++) rawHessian[i] = (double*)malloc(nDirections*sizeof(double)); // Initialize directions field with zeros for (int j=0; j<nDirections; j++) for (int i=0; i<n; i++) tangent[i][j] = 0.0; for (int j=0; j<nDofs0*blocksize0; j++) for (int i=0; i<embeddedBlocksize0; i++) tangent[(j/blocksize0)*embeddedBlocksize0+i][j] = orthonormalFrame0[j/blocksize0][j%blocksize0][i]; for (int j=0; j<nDofs1*blocksize1; j++) for (int i=0; i<embeddedBlocksize1; i++) tangent[nDofs0*embeddedBlocksize0 + (j/blocksize1)*embeddedBlocksize1+i][nDofs0*blocksize0 + j] = orthonormalFrame1[j/blocksize1][j%blocksize1][i]; hess_mat(1,nDoubles,nDirections,xp.data(),tangent,rawHessian); // Copy Hessian into Dune data type size_t offset0 = nDofs0*embeddedBlocksize0; size_t offset1 = nDofs0*blocksize0; // upper left block for(size_t i=0; i<nDofs0*embeddedBlocksize0; i++) for (size_t j=0; j<nDofs0*blocksize0; j++) embeddedHessian00[j/blocksize0][i/embeddedBlocksize0][j%blocksize0][i%embeddedBlocksize0] = rawHessian[i][j]; // upper right block for(size_t i=0; i<nDofs1*embeddedBlocksize1; i++) for (size_t j=0; j<nDofs0*blocksize0; j++) embeddedHessian01[j/blocksize0][i/embeddedBlocksize1][j%blocksize0][i%embeddedBlocksize1] = rawHessian[offset0+i][j]; // lower left block for(size_t i=0; i<nDofs0*embeddedBlocksize0; i++) for (size_t j=0; j<nDofs1*blocksize1; j++) embeddedHessian10[j/blocksize1][i/embeddedBlocksize0][j%blocksize1][i%embeddedBlocksize0] = rawHessian[i][offset1+j]; // lower right block for(size_t i=0; i<nDofs1*embeddedBlocksize1; i++) for (size_t j=0; j<nDofs1*blocksize1; j++) embeddedHessian11[j/blocksize1][i/embeddedBlocksize1][j%blocksize1][i%embeddedBlocksize1] = rawHessian[offset0+i][offset1+j]; for(size_t i=0; i<nDoubles; i++) { free(rawHessian[i]); free(tangent[i]); } #endif // From this, compute the Hessian with respect to the manifold (which we assume here is embedded // isometrically in a Euclidean space. // For the detailed explanation of the following see: Absil, Mahoney, Trumpf, "An extrinsic look // at the Riemannian Hessian". this->A00_.setSize(nDofs0,nDofs0); for (size_t col=0; col<nDofs0; col++) { for (size_t subCol=0; subCol<blocksize0; subCol++) { typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[col][subCol]; // P_x \partial^2 f z for (size_t row=0; row<nDofs0; row++) { typename TargetSpace0::TangentVector semiEmbeddedProduct; embeddedHessian00[row][col].mv(z,semiEmbeddedProduct); for (int subRow=0; subRow<blocksize0; subRow++) this->A00_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow]; } } } this->A01_.setSize(nDofs0,nDofs1); for (size_t col=0; col<nDofs1; col++) { for (size_t subCol=0; subCol<blocksize1; subCol++) { typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[col][subCol]; // P_x \partial^2 f z for (size_t row=0; row<nDofs0; row++) { typename TargetSpace0::TangentVector semiEmbeddedProduct; embeddedHessian01[row][col].mv(z,semiEmbeddedProduct); for (int subRow=0; subRow<blocksize0; subRow++) this->A01_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow]; } } } this->A10_.setSize(nDofs1,nDofs0); for (size_t col=0; col<nDofs0; col++) { for (size_t subCol=0; subCol<blocksize0; subCol++) { typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[col][subCol]; // P_x \partial^2 f z for (size_t row=0; row<nDofs1; row++) { typename TargetSpace1::TangentVector semiEmbeddedProduct; embeddedHessian10[row][col].mv(z,semiEmbeddedProduct); for (int subRow=0; subRow<blocksize1; subRow++) this->A10_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow]; } } } this->A11_.setSize(nDofs1,nDofs1); for (size_t col=0; col<nDofs1; col++) { for (size_t subCol=0; subCol<blocksize1; subCol++) { typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[col][subCol]; // P_x \partial^2 f z for (size_t row=0; row<nDofs1; row++) { typename TargetSpace1::TangentVector semiEmbeddedProduct; embeddedHessian11[row][col].mv(z,semiEmbeddedProduct); for (int subRow=0; subRow<blocksize1; subRow++) this->A11_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow]; } } } ////////////////////////////////////////////////////////////////////////////////////// // Further correction due to non-planar configuration space // + \mathfrak{A}_x(z,P^\orth_x \partial f) ////////////////////////////////////////////////////////////////////////////////////// // Project embedded gradient onto normal space std::vector<typename TargetSpace0::EmbeddedTangentVector> projectedGradient0(nDofs0); for (size_t i=0; i<nDofs0; i++) projectedGradient0[i] = localConfiguration0[i].projectOntoNormalSpace(localEmbeddedGradient0[i]); std::vector<typename TargetSpace1::EmbeddedTangentVector> projectedGradient1(nDofs1); for (size_t i=0; i<nDofs1; i++) projectedGradient1[i] = localConfiguration1[i].projectOntoNormalSpace(localEmbeddedGradient1[i]); // The Weingarten map has only diagonal entries for (size_t row=0; row<nDofs0; row++) { for (size_t subRow=0; subRow<blocksize0; subRow++) { typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[row][subRow]; typename TargetSpace0::EmbeddedTangentVector tmp1 = localConfiguration0[row].weingarten(z,projectedGradient0[row]); typename TargetSpace0::TangentVector tmp2; orthonormalFrame0[row].mv(tmp1,tmp2); this->A00_[row][row][subRow] += tmp2; } } for (size_t row=0; row<nDofs1; row++) { for (size_t subRow=0; subRow<blocksize1; subRow++) { typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[row][subRow]; typename TargetSpace1::EmbeddedTangentVector tmp1 = localConfiguration1[row].weingarten(z,projectedGradient1[row]); typename TargetSpace1::TangentVector tmp2; orthonormalFrame1[row].mv(tmp1,tmp2); this->A11_[row][row][subRow] += tmp2; } } // std::cout << "ADOL-C stiffness:\n"; // printmatrix(std::cout, this->A_, "foo", "--"); } #endif