diff --git a/dune/gfe/CMakeLists.txt b/dune/gfe/CMakeLists.txt
index ae6ae22e4d71f2a5b55082c5c9466c263d4d5c4e..5af12f1e165dabd8ec1a7daa25bc83eada378afd 100644
--- a/dune/gfe/CMakeLists.txt
+++ b/dune/gfe/CMakeLists.txt
@@ -11,6 +11,7 @@ install(FILES
         gfedifferencenormsquared.hh
         globalgfefunction.hh
         gramschmidtsolver.hh
+        interpolationderivatives.hh
         linearalgebra.hh
         localgeodesicfefunction.hh
         localgfetestfunctionbasis.hh
diff --git a/dune/gfe/assemblers/CMakeLists.txt b/dune/gfe/assemblers/CMakeLists.txt
index 279a37476b2fef992cef984465c698e82edf2374..7ebb988f4f42a1cbbf3b74b920c4d1d514b5222a 100644
--- a/dune/gfe/assemblers/CMakeLists.txt
+++ b/dune/gfe/assemblers/CMakeLists.txt
@@ -11,6 +11,7 @@ install(FILES
         localgeodesicfefdstiffness.hh
         localgeodesicfestiffness.hh
         localintegralenergy.hh
+        localintegralstiffness.hh
         mixedgfeassembler.hh
         nonplanarcosseratshellenergy.hh
         simofoxenergy.hh
diff --git a/dune/gfe/assemblers/localintegralstiffness.hh b/dune/gfe/assemblers/localintegralstiffness.hh
new file mode 100644
index 0000000000000000000000000000000000000000..ceee07f882cef76701fc9df11d1d25e3016660d3
--- /dev/null
+++ b/dune/gfe/assemblers/localintegralstiffness.hh
@@ -0,0 +1,896 @@
+#ifndef DUNE_GFE_ASSEMBLERS_LOCALINTEGRALSTIFFNESS_HH
+#define DUNE_GFE_ASSEMBLERS_LOCALINTEGRALSTIFFNESS_HH
+
+#include <adolc/adolc.h>
+
+#include <dune/fufem/utilities/adolcnamespaceinjections.hh>
+
+#include <dune/common/fmatrix.hh>
+#include <dune/common/tuplevector.hh>
+
+#include <dune/istl/matrix.hh>
+
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/gfe/assemblers/localgeodesicfestiffness.hh>
+#include <dune/gfe/assemblers/localintegralenergy.hh>
+#include <dune/gfe/densities/localdensity.hh>
+#include <dune/gfe/interpolationderivatives.hh>
+
+
+namespace Dune::GFE
+{
+  /** \brief This class assembles the energy gradient and Hessian for an integral over a given density function.
+   *
+   * Using the chain rule, gradient and Hesse matrix are split into derivatives of the
+   * density and derivatives of the geometric FE interpolation functions.
+   *
+   * These derivatives are computed by dedicated classes, and LocalIntegralStiffness
+   * combines the results.
+   *
+   * \tparam Basis The scalar finite element basis used to construct the interpolation rule
+   * \tparam LocalInterpolationRule The rule that turns coefficients into functions
+   * \tparam TargetSpace The space that the geometric finite element function maps into
+   */
+  template<class Basis, class LocalInterpolationRule, class TargetSpace>
+  class LocalIntegralStiffness
+    : public LocalGeodesicFEStiffness<Basis,TargetSpace>
+  {
+  public:
+    constexpr static int gridDim = Basis::GridView::dimension;
+
+    //! Dimension of the tangent spaces
+    constexpr static int blocksize = TargetSpace::TangentVector::dimension;
+
+    //! Dimension of the embedding spaces
+    constexpr static int embeddedBlocksize = TargetSpace::EmbeddedTangentVector::dimension;
+
+    //! Number of the independent variables for the function evaluation
+    // 1 is for the function evaluation, gridDim is for the evaluation of the derivative
+    constexpr static int m = (1 + gridDim) * embeddedBlocksize;
+
+    using GridView                    = typename Basis::GridView;
+    using DT                          = typename GridView::ctype;
+    using RT                          = typename TargetSpace::ctype;
+    using ATargetSpace                = typename TargetSpace::template rebind<adouble>::other;
+    using TargetSpaceCoordinate      = typename TargetSpace::CoordinateType;
+    // TODO: Take this from the interpolation rule
+    using TargetSpaceDerivativeType  = FieldMatrix<double, embeddedBlocksize, gridDim>;
+
+    using LocalCoordinate = typename GridView::template Codim<0>::Geometry::LocalCoordinate;
+
+    LocalIntegralStiffness(const std::shared_ptr<GFE::LocalDensity<LocalCoordinate,ATargetSpace> >& ld)
+      : localDensity_(ld)
+    {
+      densityTangent_ = myalloc3(m,m,1);
+
+      // Initialize directions field
+      for (int j=0; j<m; j++)
+        for (int i=0; i<m; i++)
+          densityTangent_[i][j][0] = i == j ? 1.0 : 0.0;
+    }
+
+    virtual ~LocalIntegralStiffness()
+    {
+      myfree3(densityTangent_);
+    }
+
+    virtual RT
+    energy(const typename Basis::LocalView& localView,
+           const typename Impl::LocalEnergyTypes<TargetSpace>::Coefficients& coefficients) const override
+    {
+      DUNE_THROW(NotImplemented,"Energy method not implemented!");
+      return 0;
+    }
+
+    /** \brief ProductManifolds: Compute the energy from coefficients in separate containers
+     * for each factor
+     */
+    virtual RT
+    energy(const typename Basis::LocalView& localView,
+           const typename Impl::LocalEnergyTypes<TargetSpace>::CompositeCoefficients& coefficients) const override
+    {
+      DUNE_THROW(NotImplemented,"Energy method not implemented!");
+      return 0;
+    }
+
+    /** \brief Assemble the element gradient of the energy functional */
+    virtual void assembleGradient(const typename Basis::LocalView& localView,
+                                  const typename Impl::LocalEnergyTypes<TargetSpace>::Coefficients& coefficients,
+                                  std::vector<double>& gradient) const override
+    {
+      DUNE_THROW(NotImplemented,"assembleGradient method not implemented!");
+    }
+
+    /** \brief Assemble the local gradient and stiffness matrix at the current position
+
+     */
+    virtual void assembleGradientAndHessian(const typename Basis::LocalView& localView,
+                                            const typename GFE::Impl::LocalStiffnessTypes<TargetSpace>::Coefficients& localCoefficients,
+                                            std::vector<double>& localGradient,
+                                            typename GFE::Impl::LocalStiffnessTypes<TargetSpace>::Hessian& localHessian) const override
+    {
+      LocalInterpolationRule localGFEFunction(localView.tree().finiteElement(),localCoefficients);
+
+      InterpolationDerivatives<LocalInterpolationRule> interpolationDerivatives(localGFEFunction,
+                                                                                localDensity_->dependsOnValue(),
+                                                                                localDensity_->dependsOnDerivative());
+
+      const size_t nDofs = localCoefficients.size();
+      const size_t n = nDofs * embeddedBlocksize;
+
+      // Precompute the orthonormal frames
+      std::vector<FieldMatrix<double,blocksize,embeddedBlocksize> > orthonormalFrames;
+
+      orthonormalFrames.resize(localCoefficients.size());
+      for (size_t i=0; i<localCoefficients.size(); ++i)
+        orthonormalFrames[i] = localCoefficients[i].orthonormalFrame();
+
+      localGradient.resize(nDofs*blocksize);
+      std::fill(localGradient.begin(), localGradient.end(), 0.0);
+
+      std::vector<typename TargetSpace::EmbeddedTangentVector> localEmbeddedGradient(nDofs);
+      std::fill(localEmbeddedGradient.begin(), localEmbeddedGradient.end(), 0.0);
+
+      localHessian.setSize(nDofs*blocksize, nDofs*blocksize);
+      localHessian = 0.0;
+
+      const auto& localFiniteElement = localView.tree().finiteElement();
+
+      const auto& element = localView.element();
+
+      int quadOrder = (localFiniteElement.type().isSimplex())
+           ? (localFiniteElement.localBasis().order()-1) * 2
+           : (localFiniteElement.localBasis().order() * gridDim - 1) * 2;
+
+      const auto& quad = QuadratureRules<double, gridDim>::rule(element.type(), quadOrder);
+
+      Matrix<double> interpolationGradient(m,n);
+      Matrix<double> interpolationGradientShort(m,nDofs*blocksize);
+
+      Matrix<FieldMatrix<double,blocksize,blocksize> > interpolationHessian(nDofs,nDofs);
+
+      Matrix<double> hessianDensity(m,m);
+
+      for (const auto& qp : quad)
+      {
+        typename TargetSpace::CoordinateType interpolationValueGlobalCoordinates;
+        TargetSpaceDerivativeType interpolationDerivative;
+
+        // We use even numbers for the FE interpolation, and odd numbers for the integral density.
+        // TODO: This comment is wrong in the other specialization.
+        int densityTapeNumber       = 2*MPIHelper::getCommunication().rank();
+        int interpolationTapeNumber = 2*MPIHelper::getCommunication().rank()+1;
+
+        // Evaluate the FE-function and its derivative with respect to the evaluation point
+        // at the current quadrature point
+        interpolationDerivatives.bind(interpolationTapeNumber,
+                                      localView.element(),
+                                      qp.position(),
+                                      interpolationValueGlobalCoordinates,
+                                      interpolationDerivative);
+
+        // Evaluate the density function and its derivatives at the current quadrature point
+        std::vector<double> densityGradient(m);
+        evaluateDensity(densityTapeNumber,
+                        localView,
+                        qp,
+                        interpolationValueGlobalCoordinates,
+                        interpolationDerivative,
+                        densityGradient,
+                        hessianDensity);
+
+        // Multiply the gradient and Hesse matrix of the density by the quadrature weight
+        // and the integration element.  This is the cheapest place to put this multiplication.
+        const auto integrationElement = element.geometry().integrationElement(qp.position());
+
+        for (auto& g : densityGradient)
+          g *= qp.weight() * integrationElement;
+        hessianDensity *= qp.weight() * integrationElement;
+
+        // Compute the derivatives of the GFE interpolation function
+        interpolationDerivatives.evaluateDerivatives(interpolationTapeNumber,
+                                                     densityGradient.data(),
+                                                     interpolationGradient,
+                                                     interpolationGradientShort,
+                                                     interpolationHessian);
+
+        // Chain rule: Multiply the derivative of the density with the derivative of the evaluation to get the total gradient, embedded
+        // Store the Euclidean gradient for the conversion from the Euclidean to the Riemannian Hesse matrix.
+        for (size_t i = 0; i < nDofs; i++)
+          for (size_t ii=0; ii<embeddedBlocksize; ++ii)
+            for (size_t j = 0; j < m; j++)
+              localEmbeddedGradient[i][ii] += densityGradient[j] * interpolationGradient[j][i*embeddedBlocksize+ii];
+
+        for (size_t i = 0; i < nDofs*blocksize; i++)
+          for (size_t j = 0; j < m; j++)
+            localGradient[i] += densityGradient[j] * interpolationGradientShort[j][i];
+
+        ///////////////////////////////////////////////////////////////////////
+        //  Chain rule to construct Hessian
+        ///////////////////////////////////////////////////////////////////////
+
+        // The range of input variables that the density depends on
+        const size_t begin = (localDensity_->dependsOnValue()) ? 0 : TargetSpace::CoordinateType::dimension;
+        const size_t end = (localDensity_->dependsOnDerivative()) ? m : TargetSpace::CoordinateType::dimension;
+
+        // tmp = hessianDensity * interpolationGradientShort
+        Matrix<double> tmp(m,nDofs*blocksize);
+        tmp = 0;
+
+        for (size_t k = begin; k < end; k++)
+          for (size_t j = 0; j < nDofs*blocksize; j++)
+            for (size_t kk = begin; kk < end; kk++)
+              tmp[k][j] += hessianDensity[k][kk] * interpolationGradientShort[kk][j];
+
+        // localHessian += interpolationGradientShort^T * tmp
+        // (lower left triangle only)
+        for (size_t k = begin; k < end; k++)
+          for (size_t i = 0; i < nDofs*blocksize; i++)
+            for (size_t j = 0; j <= i; j++)
+              localHessian[i][j] += interpolationGradientShort[k][i] * tmp[k][j];
+
+        // Add the contribution from the second derivative of the interpolation
+        // (lower left triangle only)
+        for (size_t j = 0; j < nDofs; j++)
+          for (size_t jj = 0; jj <=j; jj++)
+            for (size_t i = 0; i < blocksize; i++)
+              for (size_t ii = 0; ii < blocksize; ii++)
+                localHessian[j*blocksize+i][jj*blocksize+ii] += interpolationHessian[j][jj][i][ii];
+
+      } // Loop over the current quadrature point
+
+      // Copy the lower-left triangle to the upper-right triangle
+      for (size_t i = 0; i < nDofs*blocksize; i++)
+        for (size_t j = 0; j <= i; j++)
+          localHessian[j][i] = localHessian[i][j];
+
+      //////////////////////////////////////////////////////////////////////////////////////
+      //  From this, compute the Hessian with respect to the manifold
+      //  (which we assume here is embedded isometrically in a Euclidean space.
+      //  We add
+      //
+      //       \mathfrak{A}_x(z,P^\orth_x \partial f)
+      //
+      //  For the detailed explanation of the following see:
+      //
+      //     Absil, Mahoney, Trumpf, "An extrinsic look at the Riemannian Hessian".
+      //////////////////////////////////////////////////////////////////////////////////////
+
+      // Project embedded gradient onto normal space
+      std::vector<typename TargetSpace::EmbeddedTangentVector> projectedGradient(nDofs);
+      for (size_t i=0; i<nDofs; i++)
+        projectedGradient[i] = localCoefficients[i].projectOntoNormalSpace(localEmbeddedGradient[i]);
+
+      // The Weingarten map has only diagonal entries
+      for (size_t row=0; row<nDofs; row++)
+      {
+        for (size_t subRow=0; subRow<blocksize; subRow++)
+        {
+          typename TargetSpace::EmbeddedTangentVector z = orthonormalFrames[row][subRow];
+          auto tmp1 = localCoefficients[row].weingarten(z,projectedGradient[row]);
+
+          typename TargetSpace::TangentVector tmp2;
+          orthonormalFrames[row].mv(tmp1,tmp2);
+
+          for (size_t subCol=0; subCol<blocksize; subCol++)
+            localHessian[row*blocksize+subRow][row*blocksize+subCol] += tmp2[subCol];
+        }
+      }
+    }
+
+    /** \brief Assemble the local gradient and stiffness matrix at the current position -- Composite version
+     */
+    virtual void assembleGradientAndHessian(const typename Basis::LocalView& localView,
+                                            const typename GFE::Impl::LocalStiffnessTypes<TargetSpace>::CompositeCoefficients& localCoefficients,
+                                            std::vector<double>& localGradient,
+                                            typename GFE::Impl::LocalStiffnessTypes<TargetSpace>::CompositeHessian& localHessian) const override
+    {
+      DUNE_THROW(NotImplemented, "Method not implemented!");
+    }
+
+  private:
+
+    /** \brief Trace the call to the density function, evaluated at the current quadrature point
+     *         and the current function value and derivative
+     *  \param[in] localView                         The local view
+     *  \param[in] qp                                The quadrature point
+     *  \param[in] interpolationValueGlobalCoordinates The current function value
+     *  \param[in] interpolationDerivative             The current derivative
+     *  \param[out] derivativeDensity                The first derivative of the density function
+     *  \param[out] hessianDensity                   The second derivative of the density function
+     */
+    void evaluateDensity(short tapeNumber,
+                         const typename Basis::LocalView& localView,
+                         const QuadraturePoint<double,gridDim>& qp,
+                         const TargetSpaceCoordinate& interpolationValueGlobalCoordinates,
+                         const TargetSpaceDerivativeType& interpolationDerivative,
+                         std::vector<double>& derivativeDensity,
+                         Matrix<double>& hessianDensity) const
+    {
+      auto x = localView.element().geometry().global(qp.position());
+      trace_on(tapeNumber);
+
+      int idx=0;
+      // Construct vector containing the two dependent variables, this is where we evaluate the tape
+      // * interpolationValueGlobalCoordinates
+      // * interpolationDerivative
+      std::vector<double> xp(m);
+
+      typename ATargetSpace::CoordinateType aInterpolationValueGlobalCoordinates;
+
+      using ATargetSpaceDerivativeType = FieldMatrix<adouble, embeddedBlocksize, gridDim>;
+      ATargetSpaceDerivativeType aInterpolationDerivative;
+
+      for (size_t i = 0; i<embeddedBlocksize; i++)
+      {
+        aInterpolationValueGlobalCoordinates[i] <<= interpolationValueGlobalCoordinates[i];
+        xp[idx++] = interpolationValueGlobalCoordinates[i];
+      }
+
+      for (size_t i = 0; i<embeddedBlocksize; i++)
+        for (size_t j = 0; j<gridDim; j++) {
+          aInterpolationDerivative[i][j] <<= interpolationDerivative[i][j];
+          xp[idx++] = interpolationDerivative[i][j];
+        }
+
+      adouble density = (*localDensity_)(x, aInterpolationValueGlobalCoordinates, aInterpolationDerivative);
+
+      double pureDensity;
+      density >>= pureDensity;
+
+      trace_off();
+
+      const auto n = m;
+      const auto q = m;
+
+      double*** Yppp = myalloc3(1,q,1);   /* results of hov_wk_forward  */
+      double*** Zppp = myalloc3(q,n,2);   /* result of hos_ov_reverse */
+
+      double Upp[2] = {1.0, 0.0};
+      double* UppPtr[2] = {Upp, Upp+1};
+
+      // Compute first derivatives in forward mode
+      double value;   // The density value, as computed by hov_wk_forward
+      hov_wk_forward(tapeNumber, 1, n, 1, 2, q, xp.data(), densityTangent_, &value, Yppp);
+
+      for (int i=0; i<m; i++)
+        derivativeDensity[i] = Yppp[0][i][0];
+
+      // Compute second derivatives in reverse mode
+      hos_ov_reverse(tapeNumber, 1, n, 1, q, UppPtr, Zppp);
+
+      for (int i = 0; i < q; ++i)
+        for (int j = 0; j < n; ++j)
+          hessianDensity[j][i] = Zppp[i][j][1];
+
+      myfree3(Zppp);
+      myfree3(Yppp);
+    }
+
+    // The density that is being integrated over
+    const std::shared_ptr<GFE::LocalDensity<LocalCoordinate,ATargetSpace> > localDensity_ = nullptr;
+
+    // Dense matrix containing the directions that the derivatives of the density
+    // will be computed in.
+    double*** densityTangent_;
+  };
+
+
+  /** \brief Specialization of the LocalIntegralStiffness class for product manifolds
+   *
+   * \tparam FactorSpaces The factors of a ProductManifold type
+   */
+  template<class Basis, class LocalInterpolationRule, class ... FactorSpaces>
+  class LocalIntegralStiffness<Basis,LocalInterpolationRule,ProductManifold<FactorSpaces...> >
+    : public LocalGeodesicFEStiffness<Basis,ProductManifold<FactorSpaces...> >
+  {
+  public:
+
+    using TargetSpace = ProductManifold<FactorSpaces...>;
+
+    constexpr static int gridDim = Basis::GridView::dimension;
+
+    using TargetSpace0 = typename std::tuple_element<0,TargetSpace>::type;
+    using TargetSpace1 = typename std::tuple_element<1,TargetSpace>::type;
+
+    //! Dimension of the tangent spaces
+    constexpr static int blocksize0 = TargetSpace0::TangentVector::dimension;
+    constexpr static int blocksize1 = TargetSpace1::TangentVector::dimension;
+
+    //! Dimension of the embedding spaces
+    constexpr static int embeddedBlocksize0 = TargetSpace0::EmbeddedTangentVector::dimension;
+    constexpr static int embeddedBlocksize1 = TargetSpace1::EmbeddedTangentVector::dimension;
+
+    //! Number of the independent variables for the function evaluation
+    // 1 is for the function evaluation, gridDim is for the evaluation of the derivative
+    constexpr static int m0 = (1 + gridDim) * embeddedBlocksize0;
+    constexpr static int m1 = (1 + gridDim) * embeddedBlocksize1;
+    constexpr static int m = m0 + m1;
+
+    using GridView                    = typename Basis::GridView;
+    using DT                          = typename GridView::ctype;
+    using RT                          = typename TargetSpace0::ctype;
+    using ATargetSpace                = typename TargetSpace::template rebind<adouble>::other;
+    using ATargetSpace0               = typename TargetSpace0::template rebind<adouble>::other;
+    using ATargetSpace1               = typename TargetSpace1::template rebind<adouble>::other;
+    using TargetSpace0Coordinate      = typename TargetSpace0::CoordinateType;
+    using TargetSpace1Coordinate      = typename TargetSpace1::CoordinateType;
+    using ATargetSpaceDerivativeType   = FieldMatrix<adouble, embeddedBlocksize0+embeddedBlocksize1, gridDim>;
+    using TargetSpace0DerivativeType  = FieldMatrix<double, embeddedBlocksize0, gridDim>;
+    using TargetSpace1DerivativeType  = FieldMatrix<double, embeddedBlocksize1, gridDim>;
+
+    using LocalCoordinate = typename GridView::template Codim<0>::Geometry::LocalCoordinate;
+
+    LocalIntegralStiffness(const std::shared_ptr<GFE::LocalDensity<LocalCoordinate,ATargetSpace> >& ld)
+      : localDensity_(ld),
+      densityTangentData_(std::make_unique<double[]>(m*m))
+    {
+      for (size_t i=0; i<m; i++)
+        densityTangent_[i] = densityTangentData_.get() + i*m;
+
+      // Initialize directions field
+      for (int j=0; j<m; j++)
+        for (int i=0; i<m; i++)
+          densityTangent_[i][j] = i == j ? 1.0 : 0.0;
+    }
+
+
+    virtual ~LocalIntegralStiffness() {}
+
+    virtual RT
+    energy(const typename Basis::LocalView& localView,
+           const typename Impl::LocalEnergyTypes<TargetSpace>::Coefficients& coefficients) const override
+    {
+      DUNE_THROW(NotImplemented,"Energy method not implemented!");
+      return 0;
+    }
+
+    /** \brief ProductManifolds: Compute the energy from coefficients in separate containers
+     * for each factor
+     */
+    virtual RT
+    energy(const typename Basis::LocalView& localView,
+           const typename Impl::LocalEnergyTypes<TargetSpace>::CompositeCoefficients& coefficients) const override
+    {
+      DUNE_THROW(NotImplemented,"Energy method not implemented!");
+      return 0;
+    }
+
+    /** \brief Assemble the element gradient of the energy functional */
+    virtual void assembleGradient(const typename Basis::LocalView& localView,
+                                  const typename Impl::LocalEnergyTypes<TargetSpace>::Coefficients& coefficients,
+                                  std::vector<double>& gradient) const override
+    {
+      DUNE_THROW(NotImplemented,"assembleGradient method not implemented!");
+    }
+
+    /** \brief Assemble the local gradient and stiffness matrix at the current position
+
+     */
+    virtual void assembleGradientAndHessian(const typename Basis::LocalView& localView,
+                                            const typename Dune::GFE::Impl::LocalStiffnessTypes<TargetSpace>::Coefficients& coefficients,
+                                            std::vector<double>& localGradient,
+                                            typename Dune::GFE::Impl::LocalStiffnessTypes<TargetSpace>::Hessian& localHessian) const override
+    {
+      DUNE_THROW(NotImplemented, "Method not implemented!");
+    }
+
+    /** \brief Assemble the local gradient and stiffness matrix at the current position -- Composite version
+     */
+    virtual void assembleGradientAndHessian(const typename Basis::LocalView& localView,
+                                            const typename Dune::GFE::Impl::LocalStiffnessTypes<TargetSpace>::CompositeCoefficients& coefficients,
+                                            std::vector<double>& localGradient,
+                                            typename Dune::GFE::Impl::LocalStiffnessTypes<TargetSpace>::CompositeHessian& localHessian) const override;
+
+  protected:
+    const std::shared_ptr<GFE::LocalDensity<LocalCoordinate,ATargetSpace> > localDensity_ = nullptr;
+
+
+    // Dense matrix containing the directions that the derivatives of the density
+    // will be computed in.
+    std::unique_ptr<double[]> densityTangentData_;
+
+    // Array of pointers to the first entry of each row
+    double* densityTangent_[m];
+
+  private:
+
+    /** \brief Trace the call to the density function, evaluated at the current quadrature point
+     *         and the current function value and derivative
+     *  \param[in] localView                         The local view
+     *  \param[in] qp                                The quadrature point
+     *  \param[in] deformationValueGlobalCoordinates The current function value - part 0
+     *  \param[in] orientationValueGlobalCoordinates The current function value - part 1
+     *  \param[in] deformationDerivative             The current derivative - part 0
+     *  \param[in] orientationDerivative             The current derivative - part 1
+     *  \param[out] derivativeDensity                The first derivative of the density function
+     *  \param[out] hessianDensity                   The second derivative of the density function
+     */
+    void evaluateDensity(short tapeNumber,
+                         const typename Basis::LocalView& localView,
+                         const QuadraturePoint<double,gridDim>& qp,
+                         const TargetSpace0Coordinate& deformationValueGlobalCoordinates,
+                         const TargetSpace1Coordinate& orientationValueGlobalCoordinates,
+                         const TargetSpace0DerivativeType& deformationDerivative,
+                         const TargetSpace1DerivativeType& orientationDerivative,
+                         std::vector<double>& derivativeDensity,
+                         Matrix<double>& hessianDensity) const
+    {
+      using namespace Dune::Indices;
+      auto x = localView.element().geometry().global(qp.position());
+
+      trace_on(tapeNumber);
+
+      int idx=0;
+      // Construct vector containing the four dependent variables, this is where we evaluate the tape
+      // Dependent variables:
+      // * deformationValueGlobalCoordinates
+      // * deformationDerivative
+      // * orientationValueGlobalCoordinates
+      // * orientationDerivative
+      std::vector<double> xp(m);
+
+      // Attention: Keep the order of the 4 for-loops below as they are, this is the "order" of the dependent variables, the derivatives are ordered in the same way!
+      ATargetSpace aValue;
+      typename ATargetSpace0::CoordinateType aDeformationValueGlobalCoordinates;
+      typename ATargetSpace1::CoordinateType aOrientationValueGlobalCoordinates;
+
+      ATargetSpaceDerivativeType aDerivative;
+
+      for (size_t i = 0; i<embeddedBlocksize0; i++)
+      {
+        aDeformationValueGlobalCoordinates[i] <<= deformationValueGlobalCoordinates[i];
+        aValue[_0] = aDeformationValueGlobalCoordinates;
+        xp[idx++] = deformationValueGlobalCoordinates[i];
+      }
+
+      for (size_t i = 0; i<embeddedBlocksize0; i++)
+        for (size_t j = 0; j<gridDim; j++) {
+          aDerivative[i][j] <<= deformationDerivative[i][j];
+          xp[idx++] = deformationDerivative[i][j];
+        }
+
+      for (size_t i = 0; i<embeddedBlocksize1; i++)
+      {
+        aOrientationValueGlobalCoordinates[i] <<= orientationValueGlobalCoordinates[i];
+        aValue[_1] = aOrientationValueGlobalCoordinates;
+        xp[idx++] = orientationValueGlobalCoordinates[i];
+      }
+
+      for (size_t i = 0; i<embeddedBlocksize1; i++)
+        for (size_t j = 0; j<gridDim; j++) {
+          aDerivative[embeddedBlocksize0+i][j] <<= orientationDerivative[i][j];
+          xp[idx++] = orientationDerivative[i][j];
+        }
+
+      adouble density = (*localDensity_)(x, aValue.globalCoordinates(), aDerivative);
+
+      double pureDensity;
+      density >>= pureDensity;
+
+      trace_off();
+
+      // TODO: Remove this reverse call just for the gradient
+      gradient(tapeNumber,m,xp.data(),derivativeDensity.data());
+
+      double* hessianDensityRows[m];
+      for (int i=0; i<m; i++)
+        hessianDensityRows[i] = hessianDensity[i].data();
+
+      hess_mat(tapeNumber,m,m,xp.data(),(double**)densityTangent_,hessianDensityRows);
+    }
+
+  };
+
+  template<class Basis, class LocalInterpolationRule, class ... FactorSpaces>
+  void LocalIntegralStiffness<Basis, LocalInterpolationRule, ProductManifold<FactorSpaces...> >::
+  assembleGradientAndHessian(const typename Basis::LocalView& localView,
+                             const typename Impl::LocalStiffnessTypes<TargetSpace>::CompositeCoefficients& localCoefficients,
+                             std::vector<double>& localGradient,
+                             typename Impl::LocalStiffnessTypes<TargetSpace>::CompositeHessian& localHessian) const
+  {
+    using namespace Dune::Indices;
+
+    using DeformationLocalInterpolationRule = typename std::tuple_element<0,LocalInterpolationRule>::type;
+    using OrientationLocalInterpolationRule = typename std::tuple_element<1,LocalInterpolationRule>::type;
+
+    DeformationLocalInterpolationRule localDeformationGFEFunction(localView.tree().child(_0,0).finiteElement(),localCoefficients[_0]);
+    OrientationLocalInterpolationRule localOrientationGFEFunction(localView.tree().child(_1,0).finiteElement(),localCoefficients[_1]);
+
+    InterpolationDerivatives<DeformationLocalInterpolationRule> deformationInterpolationDerivatives(localDeformationGFEFunction,
+                                                                                                    localDensity_->dependsOnValue(0),
+                                                                                                    localDensity_->dependsOnDerivative(0));
+    InterpolationDerivatives<OrientationLocalInterpolationRule> orientationInterpolationDerivatives(localOrientationGFEFunction,
+                                                                                                    localDensity_->dependsOnValue(1),
+                                                                                                    localDensity_->dependsOnDerivative(1));
+
+    size_t nDofs0 = localCoefficients[_0].size();
+    size_t nDofs1 = localCoefficients[_1].size();
+    const size_t n0 = nDofs0 * embeddedBlocksize0;
+    const size_t n1 = nDofs1 * embeddedBlocksize1;
+
+    // Precompute the orthonormal frames
+    TupleVector<std::vector<FieldMatrix<double,blocksize0,embeddedBlocksize0> >,
+        std::vector<FieldMatrix<double,blocksize1,embeddedBlocksize1> > > orthonormalFrames;
+
+    orthonormalFrames[_0].resize(localCoefficients[_0].size());
+    for (size_t i=0; i<localCoefficients[_0].size(); ++i)
+      orthonormalFrames[_0][i] = localCoefficients[_0][i].orthonormalFrame();
+
+    orthonormalFrames[_1].resize(localCoefficients[_1].size());
+    for (size_t i=0; i<localCoefficients[_1].size(); ++i)
+      orthonormalFrames[_1][i] = localCoefficients[_1][i].orthonormalFrame();
+
+    localGradient.resize(nDofs0*blocksize0 + nDofs1*blocksize1);
+    std::fill(localGradient.begin(), localGradient.end(), 0.0);
+
+    std::vector<typename TargetSpace0::EmbeddedTangentVector> localEmbeddedGradient0(nDofs0);
+    std::vector<typename TargetSpace1::EmbeddedTangentVector> localEmbeddedGradient1(nDofs1);
+    std::fill(localEmbeddedGradient0.begin(), localEmbeddedGradient0.end(), 0.0);
+    std::fill(localEmbeddedGradient1.begin(), localEmbeddedGradient1.end(), 0.0);
+
+    localHessian[0][0].setSize(nDofs0*blocksize0, nDofs0*blocksize0);
+    localHessian[0][1].setSize(nDofs0*blocksize0, nDofs1*blocksize1);
+    localHessian[1][0].setSize(nDofs1*blocksize1, nDofs0*blocksize0);
+    localHessian[1][1].setSize(nDofs1*blocksize1, nDofs1*blocksize1);
+    for (std::size_t i=0; i<localHessian.N(); i++)
+      for (std::size_t j=0; j<localHessian.M(); j++)
+        localHessian[i][j] = 0.0;
+
+    const auto& deformationLocalFiniteElement = localView.tree().child(_0,0).finiteElement();
+
+    const auto& element = localView.element();
+
+    int quadOrder = (element.type().isSimplex()) ? deformationLocalFiniteElement.localBasis().order()
+                                                 : deformationLocalFiniteElement.localBasis().order() * gridDim;
+
+    const auto& quad = QuadratureRules<double, gridDim>::rule(element.type(), quadOrder);
+
+    Matrix<double> deformationInterpolationGradient(m0, n0);
+    Matrix<double> deformationInterpolationGradientShort(m0, nDofs0*blocksize0);
+
+    Matrix<FieldMatrix<double,blocksize0,blocksize0> > deformationInterpolationHessian(nDofs0,nDofs0);
+
+    // TODO: Rename this to riemannianSomething
+    Matrix<double> orientationInterpolationGradient(m1, n1);
+    Matrix<double> orientationInterpolationGradientShort(m1, nDofs1*blocksize1);
+
+    Matrix<FieldMatrix<double,blocksize1,blocksize1> > orientationInterpolationHessian(nDofs1,nDofs1);
+
+    Matrix<double> hessianDensity(m,m);
+
+    for (const auto& qp : quad)
+    {
+      typename TargetSpace0::CoordinateType deformationValueGlobalCoordinates;
+      typename TargetSpace1::CoordinateType orientationValueGlobalCoordinates;
+
+      TargetSpace0DerivativeType deformationDerivative;
+      TargetSpace1DerivativeType orientationDerivative;
+
+      // We use even numbers for the FE interpolation, and odd numbers for the integral density.
+      const std::size_t numberOfFactorSpaces = 2;
+      int densityTapeNumber     = (numberOfFactorSpaces+1)*MPIHelper::getCommunication().rank();
+      int deformationTapeNumber = (numberOfFactorSpaces+1)*MPIHelper::getCommunication().rank()+1;
+      int orientationTapeNumber = (numberOfFactorSpaces+1)*MPIHelper::getCommunication().rank()+2;
+
+      // Evaluate the FE-functions and their derivatives with respect to the evaluation point
+      // at the current quadrature point
+      deformationInterpolationDerivatives.bind(deformationTapeNumber,
+                                               localView.element(),
+                                               qp.position(),
+                                               deformationValueGlobalCoordinates,
+                                               deformationDerivative);
+
+      orientationInterpolationDerivatives.bind(orientationTapeNumber,
+                                               localView.element(),
+                                               qp.position(),
+                                               orientationValueGlobalCoordinates,
+                                               orientationDerivative);
+
+      // Second trace - evaluate the density function for the FE-function value and their derivative at the current QP
+      std::vector<double> densityGradient(m);
+      evaluateDensity(densityTapeNumber,
+                      localView,
+                      qp,
+                      deformationValueGlobalCoordinates,
+                      orientationValueGlobalCoordinates,
+                      deformationDerivative,
+                      orientationDerivative,
+                      densityGradient,
+                      hessianDensity);
+
+      // Multiply the gradient and Hesse matrix of the density by the quadrature weight
+      // and the integration element.  This is the cheapest place to put this multiplication.
+      const auto integrationElement = element.geometry().integrationElement(qp.position());
+
+      for (auto& g : densityGradient)
+        g *= qp.weight() * integrationElement;
+      hessianDensity *= qp.weight() * integrationElement;
+
+      // Compute the derivatives of the GFE interpolation function
+      const int deformationOffset = 0;
+      const int orientationOffset = m0;
+
+      deformationInterpolationDerivatives.evaluateDerivatives(deformationTapeNumber,
+                                                              densityGradient.data() + deformationOffset,
+                                                              deformationInterpolationGradient,
+                                                              deformationInterpolationGradientShort,
+                                                              deformationInterpolationHessian);
+
+      orientationInterpolationDerivatives.evaluateDerivatives(orientationTapeNumber,
+                                                              densityGradient.data() + orientationOffset,
+                                                              orientationInterpolationGradient,
+                                                              orientationInterpolationGradientShort,
+                                                              orientationInterpolationHessian);
+
+      // Chain rule: Multiply the derivative of the density with the derivative of the evaluation to get the total gradient, embedded
+      // Store the Euclidean gradient for the conversion from Euclidean Hesse matrix to Riemannian Hesse matrix
+      for (size_t i = 0; i < nDofs0; i++)
+        for (size_t ii=0; ii<embeddedBlocksize0; ++ii)
+          for (size_t j = 0; j < m0; j++)
+            localEmbeddedGradient0[i][ii] += densityGradient[j] * deformationInterpolationGradient[j][i*embeddedBlocksize0+ii];
+
+      for (size_t i = 0; i < nDofs1; i++)
+        for (size_t ii=0; ii<embeddedBlocksize1; ++ii)
+          for (size_t j = 0; j < m1; j++)
+            localEmbeddedGradient1[i][ii] += densityGradient[m0 + j] * orientationInterpolationGradient[j][i*embeddedBlocksize1+ii];
+
+      for (size_t i = 0; i < nDofs0*blocksize0; i++)
+        for (size_t j = 0; j < m0; j++)
+          localGradient[i] += densityGradient[j] * deformationInterpolationGradientShort[j][i];
+
+      for (size_t i = 0; i < nDofs1*blocksize1; i++)
+        for (size_t j = 0; j < m1; j++)
+          localGradient[nDofs0*blocksize0 + i] += densityGradient[m0 + j] * orientationInterpolationGradientShort[j][i];
+
+      ///////////////////////////////////////////////////////////////////////
+      //  Chain rule to construct Hessian
+      ///////////////////////////////////////////////////////////////////////
+
+      // Part one: derivative of the FE interpolation times Hessian of the density times derivative of the FE interpolation
+      // ------------------------------------------------------------------------------------------------------------------
+
+      // The range of input variables that the density depends on
+      const size_t begin0 = (localDensity_->dependsOnValue(0)) ? 0 : TargetSpace0::CoordinateType::dimension;
+      const size_t end0 = (localDensity_->dependsOnDerivative(0)) ? m0 : TargetSpace0::CoordinateType::dimension;
+
+      const size_t begin1 = (localDensity_->dependsOnValue(1)) ? 0 : TargetSpace1::CoordinateType::dimension;
+      const size_t end1 = (localDensity_->dependsOnDerivative(1)) ? m1 : TargetSpace0::CoordinateType::dimension;
+
+      // tmp00 = hessianDensity[_0][_0] * deformationInterpolationGradientShort
+      Matrix<double> tmp00(m0,nDofs0*blocksize0);
+      tmp00 = 0;
+
+      for (size_t k = begin0; k < end0; k++)
+        for (size_t j = 0; j < nDofs0*blocksize0; j++)
+          for (size_t kk = begin0; kk < end0; kk++)
+            tmp00[k][j] += hessianDensity[k][kk] * deformationInterpolationGradientShort[kk][j];
+
+      // localHessian[_0][_0] += deformationInterpolationGradientShort^T * tmp00
+      for (size_t k = begin0; k < end0; k++)
+        for (size_t i = 0; i < nDofs0*blocksize0; i++)
+          for (size_t j = 0; j <= i; j++)
+            localHessian[_0][_0][i][j] += deformationInterpolationGradientShort[k][i] * tmp00[k][j];
+
+      // tmp10 = hessianDensity[_1][_0] * deformationInterpolationGradientShort
+      Matrix<double> tmp10(m1,nDofs0*blocksize0);
+      tmp10 = 0;
+
+      for (size_t k = begin1; k < end1; k++)
+        for (size_t j = 0; j < nDofs0*blocksize0; j++)
+          for (size_t kk = begin0; kk < end0; kk++)
+            tmp10[k][j] += hessianDensity[k+m0][kk] * deformationInterpolationGradientShort[kk][j];
+
+      // localHessian[_1][_0] += orientationInterpolationGradientShort^T * tmp10
+      for (size_t k = begin1; k < end1; k++)
+        for (size_t i = 0; i < nDofs1*blocksize1; i++)
+          for (size_t j = 0; j < nDofs0*blocksize0; j++)
+            localHessian[_1][_0][i][j] += orientationInterpolationGradientShort[k][i] * tmp10[k][j];
+
+      // tmp11 = hessianDensity[_1][_1] * orientationInterpolationGradientShort
+      Matrix<double> tmp11(m1,nDofs1*blocksize1);
+      tmp11 = 0;
+
+      for (size_t k = begin1; k < end1; k++)
+        for (size_t j = 0; j < nDofs1*blocksize1; j++)
+          for (size_t kk = begin1; kk < end1; kk++)
+            tmp11[k][j] += hessianDensity[k+m0][kk+m0] * orientationInterpolationGradientShort[kk][j];
+
+      // localHessian[_1][_1] += orientationInterpolationGradientShort^T * tmp11
+      // (lower-left triangle only)
+      for (size_t k = begin1; k < end1; k++)
+        for (size_t i = 0; i < nDofs1*blocksize1; i++)
+          for (size_t j = 0; j <= i; j++)
+            localHessian[_1][_1][i][j] += orientationInterpolationGradientShort[k][i] * tmp11[k][j];
+
+      // Part two: Gradient of the density times second derivative of the FE interpolation
+      // ---------------------------------------------------------------------------------
+
+      for (std::size_t i=0; i<nDofs0; i++)
+        for (std::size_t j=0; j<nDofs0; j++)
+          for (std::size_t ii=0; ii<blocksize0; ii++)
+            for (std::size_t jj=0; jj<blocksize0; jj++)
+              localHessian[0][0][i*blocksize0+ii][j*blocksize0+jj] += deformationInterpolationHessian[i][j][ii][jj];
+
+      for (std::size_t i=0; i<nDofs1; i++)
+        for (std::size_t j=0; j<nDofs1; j++)
+          for (std::size_t ii=0; ii<blocksize1; ii++)
+            for (std::size_t jj=0; jj<blocksize1; jj++)
+              localHessian[1][1][i*blocksize1+ii][j*blocksize1+jj] += orientationInterpolationHessian[i][j][ii][jj];
+
+    } // Loop over the current quadrature point
+
+    // Copy the lower-left triangle to the upper-right triangle
+    for (size_t i = 0; i < nDofs0*blocksize0; i++)
+      for (size_t j = 0; j <= i; j++)
+        localHessian[_0][_0][j][i] = localHessian[_0][_0][i][j];
+
+    for (size_t i = 0; i < nDofs1*blocksize1; i++)
+      for (size_t j = 0; j < nDofs0*blocksize0; j++)
+        localHessian[_0][_1][j][i] = localHessian[_1][_0][i][j];
+
+    for (size_t i = 0; i < nDofs1*blocksize1; i++)
+      for (size_t j = 0; j <= i; j++)
+        localHessian[_1][_1][j][i] = localHessian[_1][_1][i][j];
+
+    //////////////////////////////////////////////////////////////////////////////////////
+    //  From this, compute the Hessian with respect to the manifold
+    //  (which we assume here is embedded isometrically in a Euclidean space.
+    //  We add
+    //
+    //       \mathfrak{A}_x(z,P^\orth_x \partial f)
+    //
+    //  For the detailed explanation of the following see:
+    //
+    //     Absil, Mahoney, Trumpf, "An extrinsic look at the Riemannian Hessian".
+    //////////////////////////////////////////////////////////////////////////////////////
+
+    // Project embedded gradient onto normal space
+    std::vector<typename TargetSpace0::EmbeddedTangentVector> projectedGradient0(nDofs0);
+    for (size_t i=0; i<nDofs0; i++)
+      projectedGradient0[i] = localCoefficients[_0][i].projectOntoNormalSpace(localEmbeddedGradient0[i]);
+
+    std::vector<typename TargetSpace1::EmbeddedTangentVector> projectedGradient1(nDofs1);
+    for (size_t i=0; i<nDofs1; i++)
+      projectedGradient1[i] = localCoefficients[_1][i].projectOntoNormalSpace(localEmbeddedGradient1[i]);
+
+    // The Weingarten map has only diagonal entries
+    // TODO: Can we make this quicker if TargetSpace is a RealTuple?
+    for (size_t row=0; row<nDofs0; row++) {
+
+      for (size_t subRow=0; subRow<blocksize0; subRow++) {
+
+        typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrames[_0][row][subRow];
+        typename TargetSpace0::EmbeddedTangentVector tmp1 = localCoefficients[_0][row].weingarten(z,projectedGradient0[row]);
+
+        typename TargetSpace0::TangentVector tmp2;
+        orthonormalFrames[_0][row].mv(tmp1,tmp2);
+
+        for (size_t subCol=0; subCol<blocksize0; subCol++)
+          localHessian[0][0][row*blocksize0+subRow][row*blocksize0+subCol] += tmp2[subCol];
+      }
+
+    }
+
+    for (size_t row=0; row<nDofs1; row++) {
+
+      for (size_t subRow=0; subRow<blocksize1; subRow++) {
+
+        typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrames[_1][row][subRow];
+        typename TargetSpace1::EmbeddedTangentVector tmp1 = localCoefficients[_1][row].weingarten(z,projectedGradient1[row]);
+
+        typename TargetSpace1::TangentVector tmp2;
+        orthonormalFrames[_1][row].mv(tmp1,tmp2);
+
+        for (size_t subCol=0; subCol<blocksize1; subCol++)
+          localHessian[1][1][row*blocksize1+subRow][row*blocksize1+subCol] += tmp2[subCol];
+      }
+
+    }
+
+  }
+
+} // namespace Dune::GFE
+
+#endif
diff --git a/dune/gfe/interpolationderivatives.hh b/dune/gfe/interpolationderivatives.hh
new file mode 100644
index 0000000000000000000000000000000000000000..f4b1d97d8f6c3c11936183bb4493f4831e5907ac
--- /dev/null
+++ b/dune/gfe/interpolationderivatives.hh
@@ -0,0 +1,700 @@
+#ifndef DUNE_GFE_INTERPOLATIONDERIVATIVES_HH
+#define DUNE_GFE_INTERPOLATIONDERIVATIVES_HH
+
+// Includes for the ADOL-C automatic differentiation library
+#include <adolc/adolc.h>
+
+#include <dune/matrix-vector/transpose.hh>
+
+#include <dune/fufem/utilities/adolcnamespaceinjections.hh>
+
+#include <dune/gfe/localgeodesicfefunction.hh>
+#include <dune/gfe/localprojectedfefunction.hh>
+#include <dune/gfe/spaces/realtuple.hh>
+
+namespace Dune::GFE
+{
+  /** \brief Compute derivatives of GFE interpolation with respect to the coefficients
+   *
+   * \tparam LocalInterpolationRule The class that implements the interpolation from a set of coefficients
+   *
+   */
+  template <typename LocalInterpolationRule>
+  class InterpolationDerivatives
+  {
+    using TargetSpace = typename LocalInterpolationRule::TargetSpace;
+
+    constexpr static auto blocksize = TargetSpace::TangentVector::dimension;
+    constexpr static auto embeddedBlocksize = TargetSpace::EmbeddedTangentVector::dimension;
+
+    //////////////////////////////////////////////////////////////////////
+    //  Data members
+    //////////////////////////////////////////////////////////////////////
+
+    const LocalInterpolationRule& localInterpolationRule_;
+
+    // Whether derivatives of the interpolation value are to be computed
+    const bool doValue_;
+
+    // Whether derivatives of the derivative of the interpolation
+    // with respect to space are to be computed
+    const bool doDerivative_;
+
+    // TODO: Don't hardcode FieldMatrix
+    std::vector<FieldMatrix<double,blocksize,embeddedBlocksize> > orthonormalFrames_;
+
+    // The coefficient values where we are evaluating the derivatives.
+    // In flattened format, because ADOL-C can accept only that.
+    std::vector<double> localConfigurationFlat_;
+
+    // The tangent vectors
+    double*** Xppp_;
+
+    // Results of hov_wk_forward
+    double*   y_;      // Function values
+    double*** Yppp_;   // First derivatives
+    double*** Zppp_;   // result of Up x H x XPPP
+    double**  Upp_;    // Vector on left-hand side
+
+    size_t numberOfTangents() const
+    {
+      return blocksize * localInterpolationRule_.size();
+    }
+
+    /** \brief Expose a two-index window from a three-index object
+     *
+     * \tparam rows Number of rows of the window
+     * \tparam cols Number of cols of the window
+     * \tparam thirdIndex The value of the third index
+     */
+    template <size_t rows, size_t cols, size_t thirdIndex>
+    class SubmatrixView
+    {
+    public:
+      SubmatrixView(double*** data, size_t blockRow, size_t blockCol)
+        : data_(data), blockRow_(blockRow), blockCol_(blockCol)
+      {}
+
+      /** \brief Access to scalar entries */
+      double& operator()(size_t row, size_t col)
+      {
+        return data_[blockRow_*rows+row][blockCol_*cols+col][thirdIndex];
+      }
+
+      /** \brief Const access to scalar entries */
+      const double& operator()(size_t row, size_t col) const
+      {
+        return data_[blockRow_*rows+row][blockCol_*cols+col][thirdIndex];
+      }
+
+      /** \brief Assignment from a transposed matrix */
+      void transposedAssign(FieldMatrix<double,cols,rows> other)
+      {
+        for (size_t i=0; i<rows; ++i)
+          for (size_t j=0; j<cols; ++j)
+            (*this)(i,j) = other[j][i];
+      }
+
+      /** \brief Matrix multiplication from the right with a transposed matrix
+       */
+      FieldMatrix<double,rows,rows> multiplyTransposed(const FieldMatrix<double,rows,cols>& other)
+      {
+        FieldMatrix<double,rows,rows> result;
+
+        for (size_t i=0; i<rows; ++i)
+          for (size_t j=0; j<rows; ++j)
+          {
+            result[i][j] = 0;
+            for (size_t k=0; k<cols; ++k)
+              result[i][j] += (*this)(i,k) * other[j][k];
+          }
+
+        return result;
+      }
+
+    private:
+      double*** data_;
+      const size_t blockRow_;
+      const size_t blockCol_;
+    };
+
+  public:
+
+    InterpolationDerivatives(const LocalInterpolationRule& localInterpolationRule,
+                             bool doValue,
+                             bool doDerivative)
+      : localInterpolationRule_(localInterpolationRule)
+      , doValue_(doValue)
+      , doDerivative_(doDerivative)
+    {
+      // Precompute the orthonormal frames
+      orthonormalFrames_.resize(localInterpolationRule_.size());
+      for (size_t i=0; i<localInterpolationRule_.size(); ++i)
+        orthonormalFrames_[i] = localInterpolationRule_.coefficient(i).orthonormalFrame();
+
+      // Construct vector containing the configuration
+      localConfigurationFlat_.resize(localInterpolationRule_.size()*embeddedBlocksize);
+
+      for (size_t i=0; i<localInterpolationRule_.size(); i++)
+        for (size_t j=0; j<embeddedBlocksize; j++)
+          localConfigurationFlat_[i*embeddedBlocksize+j] = localInterpolationRule_.coefficient(i).globalCoordinates()[j];
+
+
+      // Various arrays for ADOL-C
+      const int d = 1;   // TODO: What is this?  (ADOL-C calls this "highest derivative degree")
+
+      // Number of dependent variables of GFE interpolation
+      const size_t m = embeddedBlocksize + LocalInterpolationRule::DerivativeType::rows * LocalInterpolationRule::DerivativeType::cols;
+
+      // Number of independent variables of GFE interpolation
+      const size_t n = localInterpolationRule_.size() * embeddedBlocksize;
+
+      // Set up the tangent vectors for ADOL-C
+      // They are given by tangent vectors of TargetSpace.
+      Xppp_ = myalloc3(n,numberOfTangents(),1);   // The tangent vectors
+
+      for (size_t i=0; i<n; i++)
+        for (size_t j=0; j<numberOfTangents(); j++)
+          Xppp_[i][j][0] = 0;
+
+      for (size_t i=0; i<orthonormalFrames_.size(); ++i)
+      {
+        SubmatrixView<embeddedBlocksize,blocksize,0> view(Xppp_,i,i);
+        view.transposedAssign(orthonormalFrames_[i]);
+      }
+
+      // Results of hov_wk_forward
+      y_ = myalloc1(m);                               // Function values
+      Yppp_ = myalloc3(m,numberOfTangents(),1);   // First derivatives
+
+      Zppp_ = myalloc3(numberOfTangents(),n,d+1);   /* result of Up x H x XPPP */
+      Upp_  = myalloc2(m,d+1);     /* vector on left-hand side */
+    }
+
+    ~InterpolationDerivatives()
+    {
+      // Free allocated memory again
+      myfree3(Yppp_);
+      myfree1(y_);
+      myfree3(Xppp_);
+      myfree2(Upp_);
+      myfree3(Zppp_);
+    }
+
+    /** \brief Bind the objects to a particular evaluation point
+     *
+     * In particular, this computes the value of the interpolation function at that point,
+     * and the derivative at that point with respect to space.  The default implementation
+     * uses ADOL-C to tape these evaluations.  That is required for the evaluateDerivatives
+     * method below to be able to compute the derivatives with respect to the coefficients.
+     *
+     *  \param[in]  tapeNumber      Number of the ADOL-C tape to be used
+     *  \param[in]  localPos        Local position where the FE function is evaluated
+     *  \param[out] value           The function value at the local configuration
+     *  \param[out] derivative      The derivative of the interpolation function
+     *                              with respect to the evaluation point
+     */
+    template <typename Element>
+    void bind(short tapeNumber,
+              const Element& element,
+              const typename Element::Geometry::LocalCoordinate& localPos,
+              typename TargetSpace::CoordinateType& valueGlobalCoordinates,
+              typename LocalInterpolationRule::DerivativeType& derivative)
+    {
+      using ATargetSpace = typename TargetSpace::template rebind<adouble>::other;
+      using ALocalInterpolationRule = typename LocalInterpolationRule::template rebind<ATargetSpace>::other;
+
+      const auto geometryJacobianInverse = element.geometry().jacobianInverse(localPos);
+
+      ////////////////////////////////////////////////////////////////////////////////////////
+      //  Tape the FE interpolation and its derivative with respect to the evaluation point.
+      ////////////////////////////////////////////////////////////////////////////////////////
+      trace_on(tapeNumber);
+
+      std::vector<ATargetSpace> localAConfiguration(localInterpolationRule_.size());
+      std::vector<typename ATargetSpace::CoordinateType> aRaw(localInterpolationRule_.size());
+      for (size_t i=0; i<localInterpolationRule_.size(); i++) {
+        typename TargetSpace::CoordinateType raw = localInterpolationRule_.coefficient(i).globalCoordinates();
+        for (size_t j=0; j<raw.size(); j++)
+          aRaw[i][j] <<= raw[j];
+        localAConfiguration[i] = aRaw[i]; // may contain a projection onto M -- needs to be done in adouble
+      }
+
+      // Create the functions, we want to tape the function evaluation and the evaluation of the derivatives
+      const auto& scalarFiniteElement = localInterpolationRule_.localFiniteElement();
+      ALocalInterpolationRule localGFEFunction(scalarFiniteElement,localAConfiguration);
+
+      if (doValue_)
+      {
+        if (doDerivative_)
+        {
+          // Evaluate the function and its derivative with respect to space
+          auto [aValue, aReferenceDerivative] = localGFEFunction.evaluateValueAndDerivative(localPos);
+
+          //... and transfer the function values to global coordinates
+          auto aValueGlobalCoordinates = aValue.globalCoordinates();
+
+          // Tell ADOL-C that the value coordinates are dependent variables
+          for (size_t i = 0; i<valueGlobalCoordinates.size(); i++)
+            aValueGlobalCoordinates[i] >>= valueGlobalCoordinates[i];
+
+          // Evaluate the derivative of the function defined on the actual element - these are in global coordinates already
+          auto aDerivative = aReferenceDerivative * geometryJacobianInverse;
+
+          for (size_t i = 0; i<derivative.rows; i++)
+            for (size_t j = 0; j<derivative.cols; j++)
+              aDerivative[i][j] >>= derivative[i][j];
+        }
+        else
+        {
+          // Evaluate the function
+          auto aValue = localGFEFunction.evaluate(localPos);
+
+          //... and transfer the function values to global coordinates
+          auto aValueGlobalCoordinates = aValue.globalCoordinates();
+
+          // Tell ADOL-C that the value coordinates are dependent variables
+          for (size_t i = 0; i<valueGlobalCoordinates.size(); i++)
+            aValueGlobalCoordinates[i] >>= valueGlobalCoordinates[i];
+        }
+      }
+      else
+      {
+        if (doDerivative_)
+        {
+          // Evaluate the derivative of the local function defined on the reference element
+          const auto aReferenceDerivative = localGFEFunction.evaluateDerivative(localPos);
+
+          // Evaluate the derivative of the function defined on the actual element - these are in global coordinates already
+          auto aDerivative = aReferenceDerivative * geometryJacobianInverse;
+
+          for (size_t i = 0; i<derivative.rows; i++)
+            for (size_t j = 0; j<derivative.cols; j++)
+              aDerivative[i][j] >>= derivative[i][j];
+        }
+        else
+        {
+          // Do nothing
+        }
+      }
+
+      trace_off();
+    }
+
+    /** \brief Compute first and second derivatives of the FE interpolation
+     *
+     * This code assumes that `bind` has been called before.
+     *
+     *  \param[in]  tapeNumber            The tape number to be used by ADOL-C.  Must be the same
+     *                                    that was given to the `bind` method.
+     *  \param[in]  weights               Vector of weights that the second derivative is contracted with
+     *  \param[out] embeddedFirstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] firstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] secondDerivative      Second derivative of the FE interpolation,
+     *                                    contracted with the weight vector
+     */
+    void evaluateDerivatives(short tapeNumber,
+                             const double* weights,
+                             Matrix<double>& euclideanFirstDerivative,
+                             Matrix<double>& firstDerivative,
+                             Matrix<FieldMatrix<double,blocksize,blocksize> >& secondDerivative) const
+    {
+      const size_t nDofs = localInterpolationRule_.size();
+
+      // Number of dependent variables
+      constexpr auto valueSize = embeddedBlocksize;
+      constexpr auto derivativeSize = LocalInterpolationRule::DerivativeType::rows * LocalInterpolationRule::DerivativeType::cols;
+      const size_t m = ((doValue_) ? embeddedBlocksize : 0)
+                       + ((doDerivative_) ? derivativeSize : 0);
+
+      const size_t n = nDofs * embeddedBlocksize;
+
+      // Compute the Jacobian of the interpolation map in coordinates of the embedding space.
+      // This is a single reverse sweep, and hence should relatively cheap.
+
+      // However, first we need to wrap the euclideanFirstDerivative matrix by something ADOL-C can understand.
+      double* embeddedFirstDerivative[euclideanFirstDerivative.N()];
+
+      std::size_t counter = 0;
+      if (doValue_)
+      {
+        for (std::size_t i=0; i<valueSize; i++)
+          embeddedFirstDerivative[counter++] = euclideanFirstDerivative[i].data();
+      }
+      if (doDerivative_)
+      {
+        for (std::size_t i=0; i<derivativeSize; i++)
+          embeddedFirstDerivative[counter++] = euclideanFirstDerivative[valueSize+i].data();
+      }
+
+      // Here is the actual AD reverse sweep
+      jacobian(tapeNumber,
+               m,
+               n,
+               localConfigurationFlat_.data(),
+               embeddedFirstDerivative);
+
+      ////////////////////////////////////////////////////////////////////////////////////////
+      //  Do one forward ADOL-C sweep, using the vector in 'orthonormalFrames' as tangents.
+      //  This achieves two things:
+      //   a) It computes the Jacobian of the interpolation in the coordinates system
+      //      spanned by the orthonormalFrames bases.
+      //   b) It is the first of two steps to compute the second derivatives below.
+      ////////////////////////////////////////////////////////////////////////////////////////
+
+      const int d = 1; // TODO: What is this?  (ADOL-C calls this "highest derivative degree")
+
+      // Vector-mode forward sweep
+      // Disregard the return value.  Apparently it is not an error code.
+      hov_wk_forward(tapeNumber,
+                     m,         // Dimension of the function range space
+                     n,         // Number of independent variables
+                     d,         // ???
+                     2,         // Keep all computed Taylor coefficients for later up to this order
+                     numberOfTangents(),
+                     localConfigurationFlat_.data(), // Where to evaluate the derivative
+                     Xppp_,
+                     y_, // [out] The computed value
+                     Yppp_);    // [out] The computed Jacobian
+
+      if (doValue_)
+      {
+        for (size_t i=0; i<m; i++)
+          for (size_t j=0; j<numberOfTangents(); j++)
+            firstDerivative[i][j] = Yppp_[i][j][0];
+      }
+      else
+      {
+        for (size_t i=0; i<m; i++)
+          for (size_t j=0; j<numberOfTangents(); j++)
+            firstDerivative[i+valueSize][j] = Yppp_[i][j][0];
+      }
+
+      ///////////////////////////////////////////////////////////////////////////
+      //  Do a reverse sweep to compute the second derivative
+      ///////////////////////////////////////////////////////////////////////////
+
+      if (doValue_)
+      {
+        for (size_t i=0; i<m; i++)
+        {
+          Upp_[i][0] = weights[i];
+          Upp_[i][1] = 0;
+        }
+      }
+      else
+      {
+        for (size_t i=0; i<m; i++)
+        {
+          Upp_[i][0] = weights[i+valueSize];
+          Upp_[i][1] = 0;
+        }
+      }
+
+      // Scalar-mode reverse sweep
+      // Scalar-mode is sufficient, because we have only one vector of weights.
+      hos_ov_reverse(tapeNumber,
+                     m,   // Number of dependent variables
+                     n,   // Number of independent variables
+                     d,   // d?  Highest derivative degree?
+                     numberOfTangents(),   // Number of tangent vectors used in the previous forward sweep
+                     Upp_,
+                     Zppp_);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      //  Multiply from the right with the transposed orthonormal frames.
+      //  ADOL-C doesn't do this for us, we have to do it by hand.
+      ////////////////////////////////////////////////////////////////////////////////////
+
+      for (size_t col=0; col<nDofs; col++)
+      {
+        for (size_t row=0; row<nDofs; row++)
+        {
+          SubmatrixView<blocksize,embeddedBlocksize,1> view(Zppp_,row,col);
+          secondDerivative[row][col] = view.multiplyTransposed(orthonormalFrames_[col]);
+        }
+      }
+    }
+
+  };
+
+  /** \brief Compute derivatives of GFE interpolation to RealTuple with respect to the coefficients
+   *
+   * This is the specialization of the InterpolationDerivatives class for the RealTuple target space.
+   * Since RealTuple models the standard Euclidean space, geodesic FE interpolation reduces to
+   * standard FE interpolation, and the derivatives with respect to the coefficients can be
+   * computed much simpler and faster than for the general case.
+   */
+  template <int gridDim, typename field_type, typename LocalFiniteElement,int dim>
+  class InterpolationDerivatives<LocalGeodesicFEFunction<gridDim, field_type, LocalFiniteElement, RealTuple<field_type,dim> > >
+  {
+    using LocalInterpolationRule = LocalGeodesicFEFunction<gridDim, field_type, LocalFiniteElement, RealTuple<field_type,dim> >;
+    using TargetSpace = typename LocalInterpolationRule::TargetSpace;
+
+    constexpr static auto blocksize = TargetSpace::TangentVector::dimension;
+
+    //////////////////////////////////////////////////////////////////////
+    //  Data members
+    //////////////////////////////////////////////////////////////////////
+
+    const LocalInterpolationRule& localInterpolationRule_;
+
+    // Whether derivatives of the interpolation value are to be computed
+    const bool doValue_;
+
+    // Whether derivatives of the derivative of the interpolation
+    // with respect to space are to be computed
+    const bool doDerivative_;
+
+    // Values of all scalar shape functions at the point we are bound to
+    std::vector<FieldVector<double,1> > shapeFunctionValues_;
+
+    // Gradients of all scalar shape functions at the point we are bound to
+    // TODO: The second dimension must be WorldDim
+    std::vector<FieldMatrix<double,1,gridDim> > shapeFunctionGradients_;
+
+
+  public:
+
+    InterpolationDerivatives(const LocalInterpolationRule& localInterpolationRule,
+                             bool doValue,
+                             bool doDerivative)
+      : localInterpolationRule_(localInterpolationRule)
+      , doValue_(doValue)
+      , doDerivative_(doDerivative)
+    {}
+
+    /** \brief Bind the objects to a particular evaluation point
+     *
+     * In particular, this computes the value of the interpolation function at that point,
+     * and the derivative at that point with respect to space.  The default implementation
+     * uses ADOL-C to tape these evaluations.  That is required for the evaluateDerivatives
+     * method below to be able to compute the derivatives with respect to the coefficients.
+     *
+     *  \param[in]  tapeNumber      Number of the ADOL-C tape, not used by this specialization
+     *  \param[in]  localPos        Local position where the FE function is evaluated
+     *  \param[out] value           The function value at the local configuration
+     *  \param[out] derivative      The derivative of the interpolation function
+     *                              with respect to the evaluation point
+     */
+    template <typename Element>
+    void bind(short tapeNumber,
+              const Element& element,
+              const typename Element::Geometry::LocalCoordinate& localPos,
+              typename TargetSpace::CoordinateType& valueGlobalCoordinates,
+              typename LocalInterpolationRule::DerivativeType& derivative)
+    {
+      const auto geometryJacobianInverse = element.geometry().jacobianInverse(localPos);
+
+      const auto& scalarFiniteElement = localInterpolationRule_.localFiniteElement();
+      const auto& localBasis = scalarFiniteElement.localBasis();
+
+      // Get shape function values
+      localBasis.evaluateFunction(localPos, shapeFunctionValues_);
+
+      // Get shape function Jacobians
+      localBasis.evaluateJacobian(localPos, shapeFunctionGradients_);
+
+      for (auto& gradient : shapeFunctionGradients_)
+        gradient = gradient * geometryJacobianInverse;
+
+      std::fill(valueGlobalCoordinates.begin(), valueGlobalCoordinates.end(), 0.0);
+      for (size_t i=0; i<shapeFunctionValues_.size(); i++)
+        valueGlobalCoordinates.axpy(shapeFunctionValues_[i][0],
+                                    localInterpolationRule_.coefficient(i).globalCoordinates());
+
+      // Derivatives
+      for (size_t i=0; i<localInterpolationRule_.size(); i++)
+        for (int j=0; j<dim; j++)
+          derivative[j].axpy(localInterpolationRule_.coefficient(i).globalCoordinates()[j],
+                             shapeFunctionGradients_[i][0]);
+    }
+
+    /** \brief Compute first and second derivatives of the FE interpolation
+     *
+     * This code assumes that `bind` has been called before.
+     *
+     *  \param[in]  tapeNumber            The tape number to be used by ADOL-C.  Must be the same
+     *                                    that was given to the `bind` method.
+     *  \param[in]  weights               Vector of weights that the second derivative is contracted with
+     *  \param[out] embeddedFirstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] firstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] secondDerivative      Second derivative of the FE interpolation,
+     *                                    contracted with the weight vector
+     */
+    void evaluateDerivatives(short tapeNumber,
+                             const double* weights,
+                             Matrix<double>& embeddedFirstDerivative,
+                             Matrix<double>& firstDerivative,
+                             Matrix<FieldMatrix<double,blocksize,blocksize> >& secondDerivative) const
+    {
+      const size_t nDofs = localInterpolationRule_.size();
+
+      ////////////////////////////////////////////////////////////////////
+      //  The first derivative of the finite element interpolation
+      ////////////////////////////////////////////////////////////////////
+
+      firstDerivative = 0.0;
+
+      // First derivatives of the function value wrt to the FE coefficients
+      for (size_t i=0; i<nDofs; ++i)
+        for (int j=0; j<blocksize; ++j)
+          firstDerivative[j][i*blocksize+j] = shapeFunctionValues_[i][0];
+
+      // First derivatives of the function gradient wrt to the FE coefficients
+      for (size_t i=0; i<nDofs; ++i)
+        for (int j=0; j<blocksize; ++j)
+          for (int k=0; k<gridDim; ++k)
+            firstDerivative[blocksize + j*gridDim + k][i*blocksize+j] = shapeFunctionGradients_[i][0][k];
+
+      // For RealTuple, firstDerivative and embeddedFirstDerivative coincide
+      embeddedFirstDerivative = firstDerivative;
+
+      ////////////////////////////////////////////////////////////////////
+      //  The second derivative of the finite element interpolation
+      //  For RealTuple objects, all second derivatives are zero
+      ////////////////////////////////////////////////////////////////////
+      secondDerivative = 0;
+    }
+  };
+
+  /** \brief Compute derivatives of GFE interpolation to RealTuple with respect to the coefficients
+   *
+   * This is the specialization of the InterpolationDerivatives class for the RealTuple target space
+   * and the LocalProjectedGFEFunction interpolation.
+   * Since RealTuple models the standard Euclidean space, projection-based FE interpolation reduces to
+   * standard FE interpolation, and the derivatives with respect to the coefficients can be
+   * computed much simpler and faster than for the general case.
+   */
+  template <int gridDim, typename field_type, typename LocalFiniteElement,int dim>
+  class InterpolationDerivatives<LocalProjectedFEFunction<gridDim, field_type, LocalFiniteElement, RealTuple<field_type,dim> > >
+  {
+    // TODO: The implementation here would be identical to the geodesic FE case
+    using LocalInterpolationRule = LocalProjectedFEFunction<gridDim, field_type, LocalFiniteElement, RealTuple<field_type,dim> >;
+    using TargetSpace = typename LocalInterpolationRule::TargetSpace;
+
+    constexpr static auto blocksize = TargetSpace::TangentVector::dimension;
+
+    //////////////////////////////////////////////////////////////////////
+    //  Data members
+    //////////////////////////////////////////////////////////////////////
+
+    const LocalInterpolationRule& localInterpolationRule_;
+
+    // Whether derivatives of the interpolation value are to be computed
+    const bool doValue_;
+
+    // Whether derivatives of the derivative of the interpolation
+    // with respect to space are to be computed
+    const bool doDerivative_;
+
+    // Values of all scalar shape functions at the point we are bound to
+    std::vector<FieldVector<double,1> > shapeFunctionValues_;
+
+    // Gradients of all scalar shape functions at the point we are bound to
+    // TODO: The second dimension must be WorldDim
+    std::vector<FieldMatrix<double,1,gridDim> > shapeFunctionGradients_;
+
+  public:
+    InterpolationDerivatives(const LocalInterpolationRule& localInterpolationRule,
+                             bool doValue, bool doDerivative)
+      : localInterpolationRule_(localInterpolationRule)
+      , doValue_(doValue)
+      , doDerivative_(doDerivative)
+    {}
+
+    /** \brief Bind the objects to a particular evaluation point
+     *
+     * In particular, this computes the value of the interpolation function at that point,
+     * and the derivative at that point with respect to space.  The default implementation
+     * uses ADOL-C to tape these evaluations.  That is required for the evaluateDerivatives
+     * method below to be able to compute the derivatives with respect to the coefficients.
+     *
+     *  \param[in]  tapeNumber      Number of the ADOL-C tape, not used by this specialization
+     *  \param[in]  localPos        Local position where the FE function is evaluated
+     *  \param[out] value           The function value at the local configuration
+     *  \param[out] derivative      The derivative of the interpolation function
+     *                              with respect to the evaluation point
+     */
+    template <typename Element>
+    void bind(short tapeNumber,
+              const Element& element,
+              const typename Element::Geometry::LocalCoordinate& localPos,
+              typename TargetSpace::CoordinateType& valueGlobalCoordinates,
+              typename LocalInterpolationRule::DerivativeType& derivative)
+    {
+      const auto geometryJacobianInverse = element.geometry().jacobianInverse(localPos);
+
+      const auto& scalarFiniteElement = localInterpolationRule_.localFiniteElement();
+      const auto& localBasis = scalarFiniteElement.localBasis();
+
+      // Get shape function values
+      localBasis.evaluateFunction(localPos, shapeFunctionValues_);
+
+      // Get shape function Jacobians
+      localBasis.evaluateJacobian(localPos, shapeFunctionGradients_);
+
+      for (auto& gradient : shapeFunctionGradients_)
+        gradient = gradient * geometryJacobianInverse;
+
+      std::fill(valueGlobalCoordinates.begin(), valueGlobalCoordinates.end(), 0.0);
+      for (size_t i=0; i<shapeFunctionValues_.size(); i++)
+        valueGlobalCoordinates.axpy(shapeFunctionValues_[i][0],
+                                    localInterpolationRule_.coefficient(i).globalCoordinates());
+
+      // Derivatives
+      for (size_t i=0; i<localInterpolationRule_.size(); i++)
+        for (int j=0; j<dim; j++)
+          derivative[j].axpy(localInterpolationRule_.coefficient(i).globalCoordinates()[j],
+                             shapeFunctionGradients_[i][0]);
+    }
+
+    /** \brief Compute first and second derivatives of the FE interpolation
+     *
+     * This code assumes that `bind` has been called before.
+     *
+     *  \param[in]  tapeNumber            The tape number to be used by ADOL-C. Not used by this specialization
+     *  \param[in]  weights               Vector of weights that the second derivative is contracted with
+     *  \param[out] embeddedFirstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] firstDerivative       Derivative of the FE interpolation wrt the coefficients
+     *  \param[out] secondDerivative      Second derivative of the FE interpolation,
+     *                                    contracted with the weight vector
+     */
+    void evaluateDerivatives(short tapeNumber,
+                             const double* weights,
+                             Matrix<double>& embeddedFirstDerivative,
+                             Matrix<double>& firstDerivative,
+                             Matrix<FieldMatrix<double,blocksize,blocksize> >& secondDerivative) const
+    {
+      const size_t nDofs = localInterpolationRule_.size();
+
+      ////////////////////////////////////////////////////////////////////
+      //  The first derivative of the finite element interpolation
+      ////////////////////////////////////////////////////////////////////
+
+      firstDerivative = 0.0;
+
+      // First derivatives of the function value wrt to the FE coefficients
+      for (size_t i=0; i<nDofs; ++i)
+        for (int j=0; j<blocksize; ++j)
+          firstDerivative[j][i*blocksize+j] = shapeFunctionValues_[i][0];
+
+      // First derivatives of the function gradient wrt to the FE coefficients
+      for (size_t i=0; i<nDofs; ++i)
+        for (int j=0; j<blocksize; ++j)
+          for (int k=0; k<gridDim; ++k)
+            firstDerivative[blocksize + j*gridDim + k][i*blocksize+j] = shapeFunctionGradients_[i][0][k];
+
+      // For RealTuple, firstDerivative and embeddedFirstDerivative coincide
+      embeddedFirstDerivative = firstDerivative;
+
+      ////////////////////////////////////////////////////////////////////
+      //  The second derivative of the finite element interpolation
+      //  For RealTuple objects, all second derivatives are zero
+      ////////////////////////////////////////////////////////////////////
+      secondDerivative = 0;
+    }
+  };
+}  // namespace Dune::GFE
+
+#endif
diff --git a/dune/gfe/localgeodesicfefunction.hh b/dune/gfe/localgeodesicfefunction.hh
index 728d56acb68e1a261768959fe803468a742273b0..0d4604423171c93aa014856b861601295442b8a6 100644
--- a/dune/gfe/localgeodesicfefunction.hh
+++ b/dune/gfe/localgeodesicfefunction.hh
@@ -28,11 +28,15 @@ class LocalGfeTestFunctionBasis;
    \tparam dim Dimension of the reference element
    \tparam ctype Type used for coordinates on the reference element
    \tparam LocalFiniteElement A Lagrangian finite element whose shape functions define the interpolation weights
-   \tparam TargetSpace The manifold that the function takes its values in
+   \tparam TS TargetSpace: The manifold that the function takes its values in
  */
-template <int dim, class ctype, class LocalFiniteElement, class TargetSpace>
+template <int dim, class ctype, class LocalFiniteElement, class TS>
 class LocalGeodesicFEFunction
 {
+public:
+  using TargetSpace = TS;
+
+private:
   typedef typename TargetSpace::ctype RT;
 
   typedef typename TargetSpace::EmbeddedTangentVector EmbeddedTangentVector;
@@ -80,6 +84,21 @@ public:
     return localFiniteElement_.type();
   }
 
+  /** \brief The scalar finite element used as the interpolation weights
+   *
+   * \note This method was added for InterpolationDerivatives, which needs it
+   * to construct a copy of a LocalGeodesicFEFunction with ADOL-C's adouble
+   * number type.  This is not optimal, because the localFiniteElement
+   * really is an implementation detail of LocalGeodesicFEFunction and
+   * should not be needed just to copy an entire object.  Other non-Euclidean
+   * interpolation rules may not have such a finite element at all.
+   * Therefore, this method may disappear again eventually.
+   */
+  const LocalFiniteElement& localFiniteElement() const
+  {
+    return localFiniteElement_;
+  }
+
   /** \brief Evaluate the function */
   TargetSpace evaluate(const Dune::FieldVector<ctype, dim>& local) const;
 
diff --git a/dune/gfe/localprojectedfefunction.hh b/dune/gfe/localprojectedfefunction.hh
index b04c92974888e1b24645d276e00ae8f1ab1a09dd..40b48fe1374e892218f63850e88d51f862fb6faf 100644
--- a/dune/gfe/localprojectedfefunction.hh
+++ b/dune/gfe/localprojectedfefunction.hh
@@ -72,6 +72,21 @@ namespace Dune {
         return localFiniteElement_.type();
       }
 
+      /** \brief The scalar finite element used as the interpolation weights
+       *
+       * \note This method was added for InterpolationDerivatives, which needs it
+       * to construct a copy of a LocalGeodesicFEFunction with ADOL-C's adouble
+       * number type.  This is not optimal, because the localFiniteElement
+       * really is an implementation detail of LocalGeodesicFEFunction and
+       * should not be needed just to copy an entire object.  Other non-Euclidean
+       * interpolation rules may not have such a finite element at all.
+       * Therefore, this method may disappear again eventually.
+       */
+      const LocalFiniteElement& localFiniteElement() const
+      {
+        return localFiniteElement_;
+      }
+
       /** \brief Evaluate the function */
       auto evaluate(const Dune::FieldVector<ctype, dim>& local) const;
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index b32a0b3432cf29a957c9b59d9ccbdd84699b8f02..af33f1d7a35458a71d4ad7159f9621e2ecfe0ccf 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,6 +7,7 @@ set(TESTS
   localadolcstiffnesstest
   localgeodesicfefunctiontest
   localgfetestfunctiontest
+  localintegralstiffnesstest
   localprojectedfefunctiontest
   orthogonalmatrixtest
   polardecompositiontest
diff --git a/test/localintegralstiffnesstest.cc b/test/localintegralstiffnesstest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f6887d7bcabb7dad13fbf8538da9c46bdd748350
--- /dev/null
+++ b/test/localintegralstiffnesstest.cc
@@ -0,0 +1,530 @@
+#include <config.h>
+
+// Includes for the ADOL-C automatic differentiation library
+// Need to come before (almost) all others.
+#include <adolc/adouble.h>
+#include <dune/fufem/utilities/adolcnamespaceinjections.hh>
+
+
+#include <dune/common/parametertree.hh>
+#include <dune/common/parallel/mpihelper.hh>
+#include <dune/common/test/testsuite.hh>
+#include <dune/common/timer.hh>
+
+#include <dune/grid/uggrid.hh>
+#include <dune/grid/utility/structuredgridfactory.hh>
+
+#include <dune/functions/functionspacebases/lagrangebasis.hh>
+#include <dune/functions/functionspacebases/compositebasis.hh>
+#include <dune/functions/functionspacebases/powerbasis.hh>
+#include <dune/functions/functionspacebases/interpolate.hh>
+#include <dune/functions/functionspacebases/subspacebasis.hh>
+
+#include <dune/gfe/localprojectedfefunction.hh>
+#include <dune/gfe/assemblers/localgeodesicfeadolcstiffness.hh>
+#include <dune/gfe/assemblers/mixedgfeassembler.hh>
+#include <dune/gfe/assemblers/sumenergy.hh>
+#include <dune/gfe/assemblers/geodesicfeassembler.hh>
+#include <dune/gfe/assemblers/localintegralenergy.hh>
+#include <dune/gfe/assemblers/localintegralstiffness.hh>
+#include <dune/gfe/densities/localdensity.hh>
+#include <dune/gfe/densities/bulkcosseratdensity.hh>
+#include <dune/gfe/densities/harmonicdensity.hh>
+#include <dune/gfe/spaces/realtuple.hh>
+
+// grid dimension
+const int dim = 3;
+
+using namespace Dune;
+using namespace Dune::Indices;
+using namespace Functions::BasisFactory;
+
+enum InterpolationType {Geodesic, ProjectionBased};
+
+
+template <class GridView, InterpolationType interpolationType>
+int testHarmonicMapIntoSphere(TestSuite& test, const GridView& gridView)
+{
+  using TargetSpace = UnitVector<double,dim>;
+
+  // Finite element order
+  const int order = 1;
+
+  const static int blocksize = TargetSpace::TangentVector::dimension;
+
+  using Correction = BlockVector<TargetSpace::TangentVector>;
+  using Matrix = BCRSMatrix<FieldMatrix<double, blocksize, blocksize> >;
+
+  //////////////////////////////////////////////////////////////////////////////////
+  //  Construct all needed function space bases
+  //////////////////////////////////////////////////////////////////////////////////
+
+  auto tangentBasis = makeBasis(
+    gridView,
+    power<blocksize>(
+      lagrange<order>()
+      )
+    );
+
+  auto valueBasis = makeBasis(gridView, power<TargetSpace::CoordinateType::dimension>(lagrange<order>()));
+
+  /////////////////////////////////////////////////////////////////////////
+  //  Construct the configuration where to assemble the tangent problem
+  /////////////////////////////////////////////////////////////////////////
+
+  using CoefficientVector = std::vector<TargetSpace>;
+  CoefficientVector x(tangentBasis.size());
+
+  BlockVector<FieldVector<double,dim> > initialIterate;
+  Functions::interpolate(valueBasis, initialIterate, [](FieldVector<double,dim> x)
+                         -> FieldVector<double,dim> {
+    // Interpret x[0] and x[1] as spherical coordinates
+    auto phi = x[0];
+    auto theta = x[1];
+    return {sin(phi)*cos(theta), sin(phi)*sin(theta), cos(theta)};
+  });
+  for (size_t i = 0; i < x.size(); i++)
+    x[i] = initialIterate[i];
+
+  using ATargetSpace = typename TargetSpace::rebind<adouble>::other;
+
+  // Select which type of geometric interpolation to use
+  // TODO: Use the tangent basis here!
+  using FEBasis = Functions::LagrangeBasis<GridView,order>;
+  Functions::LagrangeBasis<GridView,order> feBasis(gridView);
+
+  //////////////////////////////////////////////////////////////
+  //  Set up the two assemblers
+  //////////////////////////////////////////////////////////////
+
+  std::shared_ptr<GeodesicFEAssembler<FEBasis,TargetSpace> > assemblerADOLC;
+  std::shared_ptr<LocalGeodesicFEStiffness<FEBasis,TargetSpace> > localIntegralStiffness;
+
+  using LocalCoordinate = typename GridView::template Codim<0>::Geometry::LocalCoordinate;
+  auto harmonicDensity = std::make_shared<GFE::HarmonicDensity<LocalCoordinate,ATargetSpace> >();
+
+  if constexpr (interpolationType==Geodesic)
+  {
+    std::cout << "Using geodesic interpolation" << std::endl;
+    using LocalInterpolationRule = LocalGeodesicFEFunction<dim,
+        typename GridView::ctype,
+        decltype(feBasis.localView().tree().finiteElement()),
+        TargetSpace>;
+
+    using ALocalInterpolationRule = LocalGeodesicFEFunction<dim,
+        typename GridView::ctype,
+        decltype(feBasis.localView().tree().finiteElement()),
+        ATargetSpace>;
+
+    // Assemble using the old assembler
+    auto energy = std::make_shared<GFE::LocalIntegralEnergy<FEBasis, ALocalInterpolationRule,ATargetSpace> >(harmonicDensity);
+
+    auto localGFEADOLCStiffness = std::make_shared<LocalGeodesicFEADOLCStiffness<FEBasis,TargetSpace> >(energy);
+    assemblerADOLC = std::make_shared<GeodesicFEAssembler<FEBasis,TargetSpace> >(feBasis, localGFEADOLCStiffness);
+
+    // Assemble using the new assembler
+    localIntegralStiffness = std::make_shared<GFE::LocalIntegralStiffness<FEBasis,LocalInterpolationRule,TargetSpace> >(harmonicDensity);
+  }
+  else
+  {
+    std::cout << "Using projection-based interpolation" << std::endl;
+    using LocalInterpolationRule = GFE::LocalProjectedFEFunction<dim,
+        typename GridView::ctype,
+        decltype(feBasis.localView().tree().finiteElement()),
+        TargetSpace>;
+
+    using ALocalInterpolationRule = GFE::LocalProjectedFEFunction<dim,
+        typename GridView::ctype,
+        decltype(feBasis.localView().tree().finiteElement()),
+        ATargetSpace>;
+
+    // Assemble using the old assembler
+    auto energy = std::make_shared<GFE::LocalIntegralEnergy<FEBasis, ALocalInterpolationRule,ATargetSpace> >(harmonicDensity);
+
+    auto localGFEADOLCStiffness = std::make_shared<LocalGeodesicFEADOLCStiffness<FEBasis,TargetSpace> >(energy);
+    assemblerADOLC = std::make_shared<GeodesicFEAssembler<FEBasis,TargetSpace> >(feBasis, localGFEADOLCStiffness);
+
+    // Assemble using the new assembler
+    localIntegralStiffness = std::make_shared<GFE::LocalIntegralStiffness<FEBasis,LocalInterpolationRule,TargetSpace> >(harmonicDensity);
+  }
+
+  // TODO: The assembler should really get the tangent basis.
+  auto assemblerIntegralStiffness = std::make_shared<GeodesicFEAssembler<FEBasis,TargetSpace> >(feBasis, localIntegralStiffness);
+
+  //////////////////////////////////////////////////////////////
+  //  Assemble
+  //////////////////////////////////////////////////////////////
+
+  Correction gradient;
+  Matrix hesseMatrix;
+
+  Correction gradientSmart;
+  Matrix hesseMatrixSmart;
+
+  Timer assemblerTimer;
+  assemblerADOLC->assembleGradientAndHessian(x, gradient, hesseMatrix,true);
+  std::cout << "assemblerTimer took: " << assemblerTimer.elapsed() << std::endl;
+
+  Timer assemblerSmartTimer;
+  assemblerIntegralStiffness->assembleGradientAndHessian(x, gradientSmart, hesseMatrixSmart,true);
+  std::cout << "assemblerSmartTimer took: " << assemblerSmartTimer.elapsed() << std::endl;
+
+  //////////////////////////////////////////////////////////////
+  //  Compare the results
+  //////////////////////////////////////////////////////////////
+
+  double gradientInfinityNorm = gradient.infinity_norm();
+  double matrixFrobeniusNorm = hesseMatrix.frobenius_norm();
+
+  double gradientSmartInfinityNorm = gradientSmart.infinity_norm();
+  double matrixSmartFrobeniusNorm = hesseMatrixSmart.frobenius_norm();
+
+  if (std::isnan(gradientInfinityNorm) || std::isnan(gradientSmartInfinityNorm) || std::abs(gradientInfinityNorm - gradientSmartInfinityNorm)/gradientInfinityNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Gradient max norm from localADOLCStiffness is " << gradientInfinityNorm
+              << ", from LocalIntegralStiffness is " << gradientSmartInfinityNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  if (std::isnan(matrixFrobeniusNorm) || std::isnan(matrixSmartFrobeniusNorm) || std::abs(matrixFrobeniusNorm - matrixSmartFrobeniusNorm)/matrixFrobeniusNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Matrix norm from localADOLCStiffness is " << matrixFrobeniusNorm
+              << ", from LocalIntegralStiffness is " << matrixSmartFrobeniusNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  // TODO: Use rangeFor
+  for (auto rowIt = hesseMatrixSmart.begin(); rowIt != hesseMatrixSmart.end(); ++rowIt)
+    for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt)
+      for (size_t k = 0; k < blocksize; k++)
+        for (size_t l = 0; l < blocksize; l++)
+          if (std::abs((*colIt)[k][l]) > 1e-6 and std::abs(hesseMatrix[rowIt.index()][colIt.index()][k][l]) > 1e-6
+              and std::abs((*colIt)[k][l] - hesseMatrix[rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-4) {
+            std::cerr << "Matrix: Index " << " " << rowIt.index() << " " << colIt.index() << " " << k << " " << l  << " is wrong: " << hesseMatrix[rowIt.index()][colIt.index()][k][l] << " - " << (*colIt)[k][l] << std::endl;
+            return 1;
+          }
+
+  return 0;
+}
+
+
+
+template <class GridView, InterpolationType interpolationType>
+int testCosseratBulkModel(TestSuite& test, const GridView& gridView)
+{
+  // Order of the approximation spaces
+  // Deliberately test with different orders, to find more bugs
+  const int deformationOrder = 2;
+  const int rotationOrder = 1;
+
+  const static int deformationBlocksize = RealTuple<double,dim>::TangentVector::dimension;
+  const static int orientationBlocksize = Rotation<double,dim>::TangentVector::dimension;
+
+  using CorrectionType0 = BlockVector<FieldVector<double, deformationBlocksize> >;
+  using CorrectionType1 = BlockVector<FieldVector<double, orientationBlocksize> >;
+
+  using MatrixType00 = BCRSMatrix<FieldMatrix<double, deformationBlocksize, deformationBlocksize> >;
+  using MatrixType01 = BCRSMatrix<FieldMatrix<double, deformationBlocksize, orientationBlocksize> >;
+  using MatrixType10 = BCRSMatrix<FieldMatrix<double, orientationBlocksize, deformationBlocksize> >;
+  using MatrixType11 = BCRSMatrix<FieldMatrix<double, orientationBlocksize, orientationBlocksize> >;
+
+  using MatrixType = MultiTypeBlockMatrix<MultiTypeBlockVector<MatrixType00,MatrixType01>,
+      MultiTypeBlockVector<MatrixType10,MatrixType11> >;
+
+  //////////////////////////////////////////////////////////////////////////////////
+  //  Construct all needed function space bases
+  //////////////////////////////////////////////////////////////////////////////////
+
+  const int dimRotation = Rotation<double,dim>::embeddedDim;
+  auto compositeBasis = makeBasis(
+    gridView,
+    composite(
+      power<dim>(
+        lagrange<deformationOrder>()
+        ),
+      power<dimRotation>(
+        lagrange<rotationOrder>()
+        )
+      ));
+
+  using CompositeBasis = decltype(compositeBasis);
+
+  /////////////////////////////////////////////////////////////////////////
+  //  Construct the configuration where to assemble the tangent problem
+  /////////////////////////////////////////////////////////////////////////
+
+  using CoefficientVector = TupleVector<std::vector<RealTuple<double,dim> >,std::vector<Rotation<double,dim> > >;
+  CoefficientVector x;
+  x[_0].resize(compositeBasis.size({0}));
+  x[_1].resize(compositeBasis.size({1}));
+
+  MultiTypeBlockVector<BlockVector<FieldVector<double,dim> >,BlockVector<FieldVector<double,dimRotation> > > initialIterate;
+  Functions::interpolate(Functions::subspaceBasis(compositeBasis, _0), initialIterate, [](FieldVector<double,dim> x){
+    FieldVector<double,dim> y = {0.5*x[0]*x[0], 0.5*x[1] + 0.2*x[0], 4*std::abs(std::sin(x[2]))};
+    return y;
+  });
+  for (size_t i = 0; i < x[_0].size(); i++)
+    x[_0][i] = initialIterate[_0][i];
+
+  Functions::interpolate(Functions::subspaceBasis(compositeBasis, _1), initialIterate, [](FieldVector<double,dim> x){
+    // Just any rotation field, for testing
+    FieldVector<double,4> y = {0.5*x[0], 0.7*x[1], 1.0*std::abs(std::sin(x[2])), 1.0 - 0.2*x[0]*x[1]};
+    return y;
+  });
+  for (size_t i = 0; i < x[_1].size(); i++)
+    x[_1][i] = Rotation<double,dim>(initialIterate[_1][i]);
+
+  // Set of material parameters
+  ParameterTree parameters;
+  parameters["thickness"] = "0.1";
+  parameters["mu"] = "1";
+  parameters["lambda"] = "1";
+  parameters["mu_c"] = "1";
+  parameters["L_c"] = "0.1";
+  parameters["q"] = "2";
+  parameters["kappa"] = "1";
+  parameters["b1"] = "1";
+  parameters["b2"] = "1";
+  parameters["b3"] = "1";
+
+  using RigidBodyMotion  = GFE::ProductManifold<RealTuple<double,dim>, Rotation<double,dim> >;
+  using ARigidBodyMotion = GFE::ProductManifold<RealTuple<adouble,dim>, Rotation<adouble,dim> >;
+
+  using LocalCoordinate = typename GridView::template Codim<0>::Geometry::LocalCoordinate;
+
+  auto bulkCosseratDensity = std::make_shared<GFE::BulkCosseratDensity<LocalCoordinate,double> >(parameters);
+  auto aBulkCosseratDensity = std::make_shared<GFE::BulkCosseratDensity<LocalCoordinate,adouble> >(parameters);
+
+  // Select which type of geometric interpolation to use
+  using DeformationFEBasis = Functions::LagrangeBasis<GridView,deformationOrder>;
+  using OrientationFEBasis = Functions::LagrangeBasis<GridView,rotationOrder>;
+
+  DeformationFEBasis deformationFEBasis(gridView);
+  OrientationFEBasis orientationFEBasis(gridView);
+
+  //////////////////////////////////////////////////////////////
+  //  Set up the two assemblers
+  //////////////////////////////////////////////////////////////
+
+  std::shared_ptr<MixedGFEAssembler<CompositeBasis,RigidBodyMotion> > assemblerADOLC;
+  std::shared_ptr<LocalGeodesicFEStiffness<CompositeBasis,RigidBodyMotion> > localIntegralStiffness;
+
+  if constexpr (interpolationType==Geodesic)
+  {
+    std::cout << "Using geodesic interpolation" << std::endl;
+
+    using LocalDeformationInterpolationRule = LocalGeodesicFEFunction<dim, typename GridView::ctype, decltype(deformationFEBasis.localView().tree().finiteElement()), RealTuple<double,dim> >;
+    using LocalOrientationInterpolationRule = LocalGeodesicFEFunction<dim, typename GridView::ctype, decltype(orientationFEBasis.localView().tree().finiteElement()), Rotation<double,dim> >;
+
+    using LocalInterpolationRule = std::tuple<LocalDeformationInterpolationRule,LocalOrientationInterpolationRule>;
+
+    using ALocalDeformationInterpolationRule = LocalGeodesicFEFunction<dim, typename GridView::ctype, decltype(deformationFEBasis.localView().tree().finiteElement()), RealTuple<adouble,dim> >;
+    using ALocalOrientationInterpolationRule = LocalGeodesicFEFunction<dim, typename GridView::ctype, decltype(orientationFEBasis.localView().tree().finiteElement()), Rotation<adouble,dim> >;
+
+    using ALocalInterpolationRule = std::tuple<ALocalDeformationInterpolationRule,ALocalOrientationInterpolationRule>;
+
+    // Assemble using the ADOL-C assembler
+    auto energy = std::make_shared<GFE::LocalIntegralEnergy<CompositeBasis, ALocalInterpolationRule,ARigidBodyMotion> >(aBulkCosseratDensity);
+
+    auto localGFEADOLCStiffness = std::make_shared<LocalGeodesicFEADOLCStiffness<CompositeBasis,RigidBodyMotion> >(energy);
+
+    assemblerADOLC = std::make_shared<MixedGFEAssembler<CompositeBasis,RigidBodyMotion> >(compositeBasis, localGFEADOLCStiffness);
+
+    // Assemble using the new assembler
+    localIntegralStiffness = std::make_shared<GFE::LocalIntegralStiffness<CompositeBasis,LocalInterpolationRule,RigidBodyMotion> >(aBulkCosseratDensity);
+  }
+  else
+  {
+    std::cout << "Using projection-based interpolation" << std::endl;
+
+    using LocalDeformationInterpolationRule = GFE::LocalProjectedFEFunction<dim, typename GridView::ctype, decltype(deformationFEBasis.localView().tree().finiteElement()), RealTuple<double,dim> >;
+    using LocalOrientationInterpolationRule = GFE::LocalProjectedFEFunction<dim, typename GridView::ctype, decltype(orientationFEBasis.localView().tree().finiteElement()), Rotation<double,dim> >;
+
+    using LocalInterpolationRule = std::tuple<LocalDeformationInterpolationRule,LocalOrientationInterpolationRule>;
+
+    using ALocalDeformationInterpolationRule = GFE::LocalProjectedFEFunction<dim, typename GridView::ctype, decltype(deformationFEBasis.localView().tree().finiteElement()), RealTuple<adouble,dim> >;
+    using ALocalOrientationInterpolationRule = GFE::LocalProjectedFEFunction<dim, typename GridView::ctype, decltype(orientationFEBasis.localView().tree().finiteElement()), Rotation<adouble,dim> >;
+
+    using ALocalInterpolationRule = std::tuple<ALocalDeformationInterpolationRule,ALocalOrientationInterpolationRule>;
+
+    // Assemble using the ADOL-C assembler
+    auto energy = std::make_shared<GFE::LocalIntegralEnergy<CompositeBasis, ALocalInterpolationRule,ARigidBodyMotion> >(aBulkCosseratDensity);
+
+    auto localGFEADOLCStiffness = std::make_shared<LocalGeodesicFEADOLCStiffness<CompositeBasis,RigidBodyMotion> >(energy);
+
+    assemblerADOLC = std::make_shared<MixedGFEAssembler<CompositeBasis,RigidBodyMotion> >(compositeBasis, localGFEADOLCStiffness);
+
+    // Assemble using the new assembler
+    localIntegralStiffness = std::make_shared<GFE::LocalIntegralStiffness<CompositeBasis,LocalInterpolationRule,RigidBodyMotion> >(aBulkCosseratDensity);
+  }
+
+  MixedGFEAssembler<CompositeBasis,RigidBodyMotion> mixedAssemblerSmart(compositeBasis,
+                                                                        localIntegralStiffness);
+
+  //////////////////////////////////////////////////////////////
+  //  Assemble
+  //////////////////////////////////////////////////////////////
+
+  CorrectionType0 gradient0;
+  CorrectionType1 gradient1;
+  MatrixType hesseMatrix;
+
+  CorrectionType0 gradientSmart0;
+  CorrectionType1 gradientSmart1;
+  MatrixType hesseMatrixSmart;
+
+  Timer assemblerTimer;
+  assemblerADOLC->assembleGradientAndHessian(x[_0], x[_1], gradient0, gradient1, hesseMatrix,true);
+  std::cout << "assemblerTimer took: " << assemblerTimer.elapsed() << std::endl;
+
+  Timer assemblerSmartTimer;
+  mixedAssemblerSmart.assembleGradientAndHessian(x[_0], x[_1], gradientSmart0, gradientSmart1, hesseMatrixSmart,true);
+  std::cout << "assemblerSmartTimer took: " << assemblerSmartTimer.elapsed() << std::endl;
+
+  //////////////////////////////////////////////////////////////
+  //  Compare the results
+  //////////////////////////////////////////////////////////////
+
+  double gradient0InfinityNorm = gradient0.infinity_norm();
+  double gradient1InfinityNorm = gradient1.infinity_norm();
+  double matrix00FrobeniusNorm = hesseMatrix[_0][_0].frobenius_norm();
+  double matrix01FrobeniusNorm = hesseMatrix[_0][_1].frobenius_norm();
+  double matrix10FrobeniusNorm = hesseMatrix[_1][_0].frobenius_norm();
+  double matrix11FrobeniusNorm = hesseMatrix[_1][_1].frobenius_norm();
+
+  double gradientSmart0InfinityNorm = gradientSmart0.infinity_norm();
+  double gradientSmart1InfinityNorm = gradientSmart1.infinity_norm();
+  double matrixSmart00FrobeniusNorm = hesseMatrixSmart[_0][_0].frobenius_norm();
+  double matrixSmart01FrobeniusNorm = hesseMatrixSmart[_0][_1].frobenius_norm();
+  double matrixSmart10FrobeniusNorm = hesseMatrixSmart[_1][_0].frobenius_norm();
+  double matrixSmart11FrobeniusNorm = hesseMatrixSmart[_1][_1].frobenius_norm();
+
+  if (std::isnan(gradient0InfinityNorm) || std::isnan(gradientSmart0InfinityNorm) || std::abs(gradient0InfinityNorm - gradientSmart0InfinityNorm)/gradient0InfinityNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Gradient (deformation part) max norm from localADOLCStiffness is " << gradient0InfinityNorm << ", from localADOLCIntegralStiffness is " << gradientSmart0InfinityNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  if (std::isnan(gradient1InfinityNorm) || std::isnan(gradientSmart1InfinityNorm) || std::abs(gradient1InfinityNorm - gradientSmart1InfinityNorm)/gradient1InfinityNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Gradient (orientation part) max norm from localADOLCStiffness is " << gradient1InfinityNorm << ", from localADOLCIntegralStiffness is " << gradientSmart1InfinityNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  if (std::isnan(matrix00FrobeniusNorm) || std::isnan(matrixSmart00FrobeniusNorm) || std::abs(matrix00FrobeniusNorm - matrixSmart00FrobeniusNorm)/matrix00FrobeniusNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Matrix00 norm from localADOLCStiffness is " << matrix00FrobeniusNorm << ", from localADOLCIntegralStiffness is " << matrixSmart00FrobeniusNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  for (auto rowIt = hesseMatrixSmart[_0][_0].begin(); rowIt != hesseMatrixSmart[_0][_0].end(); ++rowIt)
+    for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt)
+      for (size_t k = 0; k < deformationBlocksize; k++)
+        for (size_t l = 0; l < deformationBlocksize; l++)
+          if (std::abs((*colIt)[k][l]) > 1e-6 and std::abs(hesseMatrix[_0][_0][rowIt.index()][colIt.index()][k][l]) > 1e-6
+              and std::abs((*colIt)[k][l] - hesseMatrix[_0][_0][rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-4) {
+            std::cerr << "Matrix00: Index " << " " << rowIt.index() << " " << colIt.index() << " " << k << " " << l  << " is wrong: " << hesseMatrix[_0][_0][rowIt.index()][colIt.index()][k][l] << " - " << (*colIt)[k][l] << std::endl;
+            return 1;
+          }
+
+  if (std::isnan(matrix01FrobeniusNorm) || std::isnan(matrixSmart01FrobeniusNorm) || std::abs(matrix01FrobeniusNorm - matrixSmart01FrobeniusNorm)/matrix01FrobeniusNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Matrix01 norm from localADOLCStiffness is " << matrix01FrobeniusNorm << ", from localADOLCIntegralStiffness is " << matrixSmart01FrobeniusNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  for (auto rowIt = hesseMatrixSmart[_0][_1].begin(); rowIt != hesseMatrixSmart[_0][_1].end(); ++rowIt)
+    for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt)
+      for (size_t k = 0; k < deformationBlocksize; k++)
+        for (size_t l = 0; l < deformationBlocksize; l++)
+          if (std::abs((*colIt)[k][l]) > 1e-6 and std::abs(hesseMatrix[_0][_1][rowIt.index()][colIt.index()][k][l]) > 1e-6
+              and std::abs((*colIt)[k][l] - hesseMatrix[_0][_1][rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-4) {
+            std::cerr << "Matrix01 Index " << " " << rowIt.index() << " " << colIt.index() << " " << k << " " << l  << " is wrong: " << hesseMatrix[_0][_1][rowIt.index()][colIt.index()][k][l] << " - " << (*colIt)[k][l] << std::endl;
+            return 1;
+          }
+
+  if (std::isnan(matrix10FrobeniusNorm) || std::isnan(matrixSmart10FrobeniusNorm) || std::abs(matrix10FrobeniusNorm - matrixSmart10FrobeniusNorm)/matrix10FrobeniusNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Matrix10 norm from localADOLCStiffness is " << matrix10FrobeniusNorm << ", from localADOLCIntegralStiffness is " << matrixSmart10FrobeniusNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  for (auto rowIt = hesseMatrixSmart[_1][_0].begin(); rowIt != hesseMatrixSmart[_1][_0].end(); ++rowIt)
+    for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt)
+      for (size_t k = 0; k < deformationBlocksize; k++)
+        for (size_t l = 0; l < deformationBlocksize; l++)
+          if (std::abs((*colIt)[k][l]) > 1e-6 and std::abs(hesseMatrix[_1][_0][rowIt.index()][colIt.index()][k][l]) > 1e-6
+              and std::abs((*colIt)[k][l] - hesseMatrix[_1][_0][rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-4) {
+            std::cerr << "Matrix10 Index " << " " << rowIt.index() << " " << colIt.index() << " " << k << " " << l  << " is wrong: " << hesseMatrix[_1][_0][rowIt.index()][colIt.index()][k][l] << " - " << (*colIt)[k][l] << std::endl;
+            return 1;
+          }
+
+  if (std::isnan(matrix11FrobeniusNorm) || std::isnan(matrixSmart00FrobeniusNorm) || std::abs(matrix11FrobeniusNorm - matrixSmart11FrobeniusNorm)/matrix11FrobeniusNorm > 1e-3)
+  {
+    std::cerr << std::setprecision(9);
+    std::cerr << "Matrix11 norm from localADOLCStiffness is " << matrix11FrobeniusNorm << ", from localADOLCIntegralStiffness is " << matrixSmart11FrobeniusNorm << " :(" << std::endl;
+    return 1;
+  }
+
+  for (auto rowIt = hesseMatrixSmart[_1][_1].begin(); rowIt != hesseMatrixSmart[_1][_1].end(); ++rowIt)
+    for (auto colIt = rowIt->begin(); colIt != rowIt->end(); ++colIt)
+      for (size_t k = 0; k < deformationBlocksize; k++)
+        for (size_t l = 0; l < deformationBlocksize; l++)
+          if (std::abs((*colIt)[k][l]) > 1e-6 and std::abs(hesseMatrix[_1][_1][rowIt.index()][colIt.index()][k][l]) > 1e-6
+              and std::abs((*colIt)[k][l] - hesseMatrix[_1][_1][rowIt.index()][colIt.index()][k][l])/(*colIt)[k][l] > 1e-4) {
+            std::cerr << "Matrix11 Index " << " " << rowIt.index() << " " << colIt.index() << " " << k << " " << l  << " is wrong: " << hesseMatrix[_1][_1][rowIt.index()][colIt.index()][k][l] << " - " << (*colIt)[k][l] << std::endl;
+            return 1;
+          }
+
+  return 0;
+}
+
+
+int main (int argc, char *argv[])
+{
+  MPIHelper::instance(argc, argv);
+
+  TestSuite test;
+
+  /////////////////////////////////////////
+  //   Create the grid
+  /////////////////////////////////////////
+
+  // Create a grid with 2 elements, one element will get refined to create different element types, the other one stays a cube
+  using GridType = UGGrid<dim>;
+  auto grid = StructuredGridFactory<GridType>::createCubeGrid({0,0,0}, {1,1,1}, {2,1,1});
+
+  //Refine once
+  for (auto&& e : elements(grid->leafGridView())) {
+    bool refineThisElement = false;
+    for (int i = 0; i < e.geometry().corners(); i++) {
+      refineThisElement = refineThisElement || (e.geometry().corner(i)[0] > 0.99);
+    }
+    grid->mark(refineThisElement ? 1 : 0, e);
+  }
+  grid->adapt();
+  grid->loadBalance();
+
+  using GridView = GridType::LeafGridView;
+  auto gridView = grid->leafGridView();
+
+  /////////////////////////////////////////////
+  //  Test different energies
+  /////////////////////////////////////////////
+
+  // TODO: Use test framework
+  testHarmonicMapIntoSphere<GridView,Geodesic>(test, gridView);
+  testHarmonicMapIntoSphere<GridView,ProjectionBased>(test, gridView);
+
+  testCosseratBulkModel<GridView,Geodesic>(test, gridView);
+  testCosseratBulkModel<GridView,ProjectionBased>(test, gridView);
+
+  return test.exit();
+}