diff --git a/dune/gfe/feassembler.hh b/dune/gfe/feassembler.hh
new file mode 100644
index 0000000000000000000000000000000000000000..700d93b7d98bd950b77a7602de5994eabe819f72
--- /dev/null
+++ b/dune/gfe/feassembler.hh
@@ -0,0 +1,202 @@
+#ifndef DUNE_GFE_FE_ASSEMBLER_HH
+#define DUNE_GFE_FE_ASSEMBLER_HH
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/matrix.hh>
+
+//#include "localgeodesicfestiffness.hh"
+
+
+/** \brief A global FE assembler for problems involving functions that map into non-Euclidean spaces
+ */
+template <class Basis, class VectorType>
+class FEAssembler {
+
+    typedef typename Basis::GridView GridView;
+    typedef typename GridView::template Codim<0>::template Partition<Dune::Interior_Partition>::Iterator ElementIterator;
+
+    //! Dimension of the grid.
+    enum { gridDim = GridView::dimension };
+
+    //! Dimension of a tangent space
+    enum { blocksize = VectorType::value_type::dimension };
+
+    //!
+    typedef Dune::FieldMatrix<double, blocksize, blocksize> MatrixBlock;
+
+public:
+    const Basis basis_;
+
+protected:
+
+    LocalFEStiffness<GridView,
+                             typename Basis::LocalFiniteElement,
+                             VectorType>* localStiffness_;
+
+public:
+
+    /** \brief Constructor for a given grid */
+    FEAssembler(const Basis& basis,
+                LocalFEStiffness<GridView,typename Basis::LocalFiniteElement, VectorType>* localStiffness)
+        : basis_(basis),
+          localStiffness_(localStiffness)
+    {}
+
+    /** \brief Assemble the tangent stiffness matrix and the functional gradient together
+     *
+     * This is more efficient than computing them separately, because you need the gradient
+     * anyway to compute the Riemannian Hessian.
+     */
+    virtual void assembleGradientAndHessian(const VectorType& sol,
+                                            const VectorType& pointLoads,
+                                            Dune::BlockVector<Dune::FieldVector<double, blocksize> >& gradient,
+                                            Dune::BCRSMatrix<MatrixBlock>& hessian,
+                                            bool computeOccupationPattern=true) const;
+
+    /** \brief Compute the energy of a deformation state */
+    virtual double computeEnergy(const VectorType& sol,
+                                 const VectorType& pointLoads) const;
+
+    //protected:
+    void getNeighborsPerVertex(Dune::MatrixIndexSet& nb) const;
+
+}; // end class
+
+
+
+template <class Basis, class TargetSpace>
+void FEAssembler<Basis,TargetSpace>::
+getNeighborsPerVertex(Dune::MatrixIndexSet& nb) const
+{
+    int n = basis_.size();
+
+    nb.resize(n, n);
+
+    ElementIterator it    = basis_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endit = basis_.getGridView().template end<0,Dune::Interior_Partition>  ();
+
+    for (; it!=endit; ++it) {
+
+        const typename Basis::LocalFiniteElement& lfe = basis_.getLocalFiniteElement(*it);
+
+        for (size_t i=0; i<lfe.localBasis().size(); i++) {
+
+            for (size_t j=0; j<lfe.localBasis().size(); j++) {
+
+                int iIdx = basis_.index(*it,i);
+                int jIdx = basis_.index(*it,j);
+
+                nb.add(iIdx, jIdx);
+
+            }
+
+        }
+
+    }
+
+}
+
+template <class Basis, class VectorType>
+void FEAssembler<Basis,VectorType>::
+assembleGradientAndHessian(const VectorType& sol,
+                           const VectorType& pointLoads,
+                           Dune::BlockVector<Dune::FieldVector<double, blocksize> >& gradient,
+                           Dune::BCRSMatrix<MatrixBlock>& hessian,
+                           bool computeOccupationPattern) const
+{
+    if (computeOccupationPattern) {
+
+        Dune::MatrixIndexSet neighborsPerVertex;
+        getNeighborsPerVertex(neighborsPerVertex);
+        neighborsPerVertex.exportIdx(hessian);
+
+    }
+
+    hessian = 0;
+
+    gradient.resize(sol.size());
+    gradient = 0;
+
+    ElementIterator it    = basis_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endit = basis_.getGridView().template end<0,Dune::Interior_Partition>  ();
+
+    for( ; it != endit; ++it ) {
+
+        const int numOfBaseFct = basis_.getLocalFiniteElement(*it).localBasis().size();
+
+        // Extract local solution
+        VectorType localSolution(numOfBaseFct);
+        VectorType localPointLoads(numOfBaseFct);
+
+        for (int i=0; i<numOfBaseFct; i++) {
+            localSolution[i]   = sol[basis_.index(*it,i)];
+            localPointLoads[i] = pointLoads[basis_.index(*it,i)];
+        }
+
+        std::vector<Dune::FieldVector<double,blocksize> > localGradient(numOfBaseFct);
+
+        // setup local matrix and gradient
+        localStiffness_->assembleGradientAndHessian(*it, basis_.getLocalFiniteElement(*it), localSolution, localPointLoads, localGradient);
+
+        // Add element matrix to global stiffness matrix
+        for(int i=0; i<numOfBaseFct; i++) {
+
+            int row = basis_.index(*it,i);
+
+            for (int j=0; j<numOfBaseFct; j++ ) {
+
+                int col = basis_.index(*it,j);
+                hessian[row][col] += localStiffness_->A_[i][j];
+
+            }
+        }
+
+        // Add local gradient to global gradient
+        for (int i=0; i<numOfBaseFct; i++)
+            gradient[basis_.index(*it,i)] += localGradient[i];
+
+    }
+
+}
+
+
+template <class Basis, class VectorType>
+double FEAssembler<Basis, VectorType>::
+computeEnergy(const VectorType& sol, const VectorType& pointLoads) const
+{
+    double energy = 0;
+
+    if (sol.size()!=basis_.size())
+        DUNE_THROW(Dune::Exception, "Solution vector doesn't match the grid!");
+
+    ElementIterator it    = basis_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endIt = basis_.getGridView().template end<0,Dune::Interior_Partition>();
+
+    // Loop over all elements
+    for (; it!=endIt; ++it) {
+
+        // Number of degrees of freedom on this element
+        size_t nDofs = basis_.getLocalFiniteElement(*it).localBasis().size();
+
+        VectorType localSolution(nDofs);
+        VectorType localPointLoads(nDofs);
+
+        for (size_t i=0; i<nDofs; i++) {
+            localSolution[i]   = sol[basis_.index(*it,i)];
+            localPointLoads[i] = pointLoads[basis_.index(*it,i)];
+        }
+
+        energy += localStiffness_->energy(*it, basis_.getLocalFiniteElement(*it), localSolution, localPointLoads);
+
+    }
+
+    return energy;
+
+}
+
+
+
+#endif
+
diff --git a/dune/gfe/localadolcstiffness.hh b/dune/gfe/localadolcstiffness.hh
new file mode 100644
index 0000000000000000000000000000000000000000..87db444fe66caef9bf7b38ce8325ff4ee4b21753
--- /dev/null
+++ b/dune/gfe/localadolcstiffness.hh
@@ -0,0 +1,206 @@
+#ifndef DUNE_GFE_LOCAL_ADOL_C_STIFFNESS_HH
+#define DUNE_GFE_LOCAL_ADOL_C_STIFFNESS_HH
+
+#include <adolc/adouble.h>            // use of active doubles
+#include <adolc/drivers/drivers.h>    // use of "Easy to Use" drivers
+// gradient(.) and hessian(.)
+#include <adolc/interfaces.h>    // use of "Easy to Use" drivers
+#include <adolc/taping.h>             // use of taping
+
+#include <dune/gfe/adolcnamespaceinjections.hh>
+
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrix.hh>
+
+#include <dune/gfe/localfestiffness.hh>
+
+//#define ADOLC_VECTOR_MODE
+
+/** \brief Assembles energy gradient and Hessian with ADOL-C (automatic differentiation)
+ */
+template<class GridView, class LocalFiniteElement, class VectorType>
+class LocalADOLCStiffness
+    : public LocalFEStiffness<GridView,LocalFiniteElement,VectorType>
+{
+    // grid types
+    typedef typename GridView::Grid::ctype DT;
+    typedef typename VectorType::value_type::field_type RT;
+    typedef typename GridView::template Codim<0>::Entity Entity;
+
+    // some other sizes
+    enum {gridDim=GridView::dimension};
+
+    // Hack
+    typedef std::vector<Dune::FieldVector<adouble,gridDim> > AVectorType;
+
+public:
+
+    //! Dimension of a tangent space
+    enum { blocksize = VectorType::value_type::dimension };
+
+    LocalADOLCStiffness(const LocalFEStiffness<GridView, LocalFiniteElement, AVectorType>* energy)
+    : localEnergy_(energy)
+    {}
+
+    /** \brief Compute the energy at the current configuration */
+    virtual RT energy (const Entity& e,
+               const LocalFiniteElement& localFiniteElement,
+               const VectorType& localConfiguration,
+               const std::vector<Dune::FieldVector<double,3> >& localPointLoads) const;
+
+    /** \brief Assemble the local stiffness matrix at the current position
+
+    This uses the automatic differentiation toolbox ADOL_C.
+    */
+    virtual void assembleGradientAndHessian(const Entity& e,
+                         const LocalFiniteElement& localFiniteElement,
+                         const VectorType& localConfiguration,
+                         const std::vector<Dune::FieldVector<double,3> >& localPointLoads,
+                         VectorType& localGradient);
+
+    const LocalFEStiffness<GridView, LocalFiniteElement, AVectorType>* localEnergy_;
+
+};
+
+
+template <class GridView, class LocalFiniteElement, class VectorType>
+typename LocalADOLCStiffness<GridView, LocalFiniteElement, VectorType>::RT
+LocalADOLCStiffness<GridView, LocalFiniteElement, VectorType>::
+energy(const Entity& element,
+       const LocalFiniteElement& localFiniteElement,
+       const VectorType& localSolution,
+       const std::vector<Dune::FieldVector<double,3> >& localPointLoads) const
+{
+    double pureEnergy;
+
+    std::vector<Dune::FieldVector<adouble,blocksize> > localASolution(localSolution.size());
+
+    trace_on(1);
+
+    adouble energy = 0;
+
+    for (size_t i=0; i<localSolution.size(); i++)
+      for (size_t j=0; j<localSolution[i].size(); j++)
+        localASolution[i][j] <<= localSolution[i][j];
+
+    energy = localEnergy_->energy(element,localFiniteElement,localASolution,localPointLoads);
+
+    energy >>= pureEnergy;
+
+    trace_off();
+
+    return pureEnergy;
+}
+
+
+
+// ///////////////////////////////////////////////////////////
+//   Compute gradient and Hessian together
+//   To compute the Hessian we need to compute the gradient anyway, so we may
+//   as well return it.  This saves assembly time.
+// ///////////////////////////////////////////////////////////
+template <class GridType, class LocalFiniteElement, class VectorType>
+void LocalADOLCStiffness<GridType, LocalFiniteElement, VectorType>::
+assembleGradientAndHessian(const Entity& element,
+                const LocalFiniteElement& localFiniteElement,
+                const VectorType& localSolution,
+                const std::vector<Dune::FieldVector<double,3> >& localPointLoads,
+                VectorType& localGradient)
+{
+    // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
+    energy(element, localFiniteElement, localSolution, localPointLoads);
+
+    /////////////////////////////////////////////////////////////////
+    // Compute the energy gradient
+    /////////////////////////////////////////////////////////////////
+
+    // Compute the actual gradient
+    size_t nDofs = localSolution.size();
+    size_t nDoubles = nDofs*blocksize;
+    std::vector<double> xp(nDoubles);
+    int idx=0;
+    for (size_t i=0; i<nDofs; i++)
+        for (size_t j=0; j<blocksize; j++)
+            xp[idx++] = localSolution[i][j];
+
+  // Compute gradient
+    std::vector<double> g(nDoubles);
+    gradient(1,nDoubles,xp.data(),g.data());                  // gradient evaluation
+
+    // Copy into Dune type
+    localGradient.resize(localSolution.size());
+
+    idx=0;
+    for (size_t i=0; i<nDofs; i++)
+        for (size_t j=0; j<blocksize; j++)
+            localGradient[i][j] = g[idx++];
+
+    /////////////////////////////////////////////////////////////////
+    // Compute Hessian
+    /////////////////////////////////////////////////////////////////
+
+    this->A_.setSize(nDofs,nDofs);
+
+#ifndef ADOLC_VECTOR_MODE
+    std::vector<double> v(nDoubles);
+    std::vector<double> w(nDoubles);
+
+    std::fill(v.begin(), v.end(), 0.0);
+
+    for (int i=0; i<nDofs; i++)
+      for (int ii=0; ii<blocksize; ii++)
+      {
+        // Evaluate Hessian in the direction of each vector of the orthonormal frame
+        for (size_t k=0; k<blocksize; k++)
+          v[i*blocksize + k] = (k==ii);
+
+        int rc= 3;
+        MINDEC(rc, hess_vec(1, nDoubles, xp.data(), v.data(), w.data()));
+        if (rc < 0)
+          DUNE_THROW(Dune::Exception, "ADOL-C has returned with error code " << rc << "!");
+
+        for (int j=0; j<nDoubles; j++)
+          this->A_[i][j/blocksize][ii][j%blocksize] = w[j];
+
+        // Make v the null vector again
+        std::fill(&v[i*blocksize], &v[(i+1)*blocksize], 0.0);
+      }
+#else
+    int n = nDoubles;
+    int nDirections = nDofs * blocksize;
+    double* tangent[nDoubles];
+    for(size_t i=0; i<nDoubles; i++)
+        tangent[i] = (double*)malloc(nDirections*sizeof(double));
+
+    double* rawHessian[nDoubles];
+    for(size_t i=0; i<nDoubles; i++)
+        rawHessian[i] = (double*)malloc(nDirections*sizeof(double));
+
+    for (int j=0; j<nDirections; j++)
+    {
+      for (int i=0; i<n; i++)
+        tangent[i][j] = 0.0;
+
+      for (int i=0; i<embeddedBlocksize; i++)
+        tangent[(j/blocksize)*embeddedBlocksize+i][j] = orthonormalFrame[j/blocksize][j%blocksize][i];
+    }
+
+    hess_mat(1,nDoubles,nDirections,xp.data(),tangent,rawHessian);
+
+    // Copy Hessian into Dune data type
+    for(size_t i=0; i<nDoubles; i++)
+      for (size_t j=0; j<nDirections; j++)
+        this->A_[j/blocksize][i/embeddedBlocksize][j%blocksize][i%embeddedBlocksize] = rawHessian[i][j];
+
+    for(size_t i=0; i<nDoubles; i++) {
+        free(rawHessian[i]);
+        free(tangent[i]);
+    }
+#endif
+
+//     std::cout << "ADOL-C stiffness:\n";
+//     printmatrix(std::cout, this->A_, "foo", "--");
+}
+
+#endif
+
diff --git a/dune/gfe/localfestiffness.hh b/dune/gfe/localfestiffness.hh
new file mode 100644
index 0000000000000000000000000000000000000000..c6ea01102fdc3fb94d96edc41a57c9d28cd3415b
--- /dev/null
+++ b/dune/gfe/localfestiffness.hh
@@ -0,0 +1,70 @@
+#ifndef LOCAL_FE_STIFFNESS_HH
+#define LOCAL_FE_STIFFNESS_HH
+
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrix.hh>
+
+
+template<class GridView, class LocalFiniteElement, class VectorType>
+class LocalFEStiffness
+{
+    // grid types
+    typedef typename GridView::Grid::ctype DT;
+    typedef typename VectorType::value_type::field_type RT;
+    typedef typename GridView::template Codim<0>::Entity Entity;
+
+    // some other sizes
+    enum {gridDim=GridView::dimension};
+
+public:
+
+    //! Dimension of a tangent space
+    enum { blocksize = VectorType::value_type::dimension };
+
+    /** \brief Assemble the local stiffness matrix at the current position
+
+    This default implementation used finite-difference approximations to compute the second derivatives
+
+    The formula for the Riemannian Hessian has been taken from Absil, Mahony, Sepulchre:
+    'Optimization algorithms on matrix manifolds', page 107.  There it says that
+    \f[
+        \langle Hess f(x)[\xi], \eta \rangle
+            = \frac 12 \frac{d^2}{dt^2} \Big(f(\exp_x(t(\xi + \eta))) - f(\exp_x(t\xi)) - f(\exp_x(t\eta))\Big)\Big|_{t=0}.
+    \f]
+    We compute that using a finite difference approximation.
+
+    */
+    virtual void assembleGradientAndHessian(const Entity& e,
+                                 const LocalFiniteElement& localFiniteElement,
+                                 const VectorType& localConfiguration,
+                                 const std::vector<Dune::FieldVector<double,3> >& localPointLoads,
+                                 VectorType& localGradient);
+
+    /** \brief Compute the energy at the current configuration */
+    virtual RT energy (const Entity& e,
+                       const LocalFiniteElement& localFiniteElement,
+                       const VectorType& localConfiguration,
+                       const std::vector<Dune::FieldVector<double,3> >& localPointLoads) const = 0;
+
+    // assembled data
+    Dune::Matrix<Dune::FieldMatrix<RT,blocksize,blocksize> > A_;
+
+};
+
+
+// ///////////////////////////////////////////////////////////
+//   Compute gradient by finite-difference approximation
+// ///////////////////////////////////////////////////////////
+template <class GridType, class LocalFiniteElement, class VectorType>
+void LocalFEStiffness<GridType, LocalFiniteElement, VectorType>::
+assembleGradientAndHessian(const Entity& element,
+                const LocalFiniteElement& localFiniteElement,
+                const VectorType& localConfiguration,
+                const std::vector<Dune::FieldVector<double,3> >& localPointLoads,
+                VectorType& localGradient)
+{
+  DUNE_THROW(Dune::NotImplemented, "!");
+}
+
+#endif
+