diff --git a/dune/gfe/harmonicenergystiffness.hh b/dune/gfe/harmonicenergystiffness.hh
index 8ea1ca342043c9d274dfdcc29929d4b3f3924c47..0113dafcd1da2b8556156665957028335866b919 100644
--- a/dune/gfe/harmonicenergystiffness.hh
+++ b/dune/gfe/harmonicenergystiffness.hh
@@ -27,6 +27,11 @@ public:
     /** \brief Assemble the energy for a single element */
     RT energy (const Entity& e,
                const std::vector<TargetSpace>& localSolution) const;
+               
+    /** \brief Assemble the gradient of the energy functional on one element */
+    virtual void assembleEmbeddedGradient(const Entity& element,
+                                  const std::vector<TargetSpace>& solution,
+                                  std::vector<typename TargetSpace::EmbeddedTangentVector>& gradient) const;
 
 };
 
@@ -92,5 +97,68 @@ energy(const Entity& element,
     return 0.5 * energy;
 }
 
+
+template <class GridView, class TargetSpace>
+void HarmonicEnergyLocalStiffness<GridView, TargetSpace>::
+assembleEmbeddedGradient(const Entity& element,
+                 const std::vector<TargetSpace>& localSolution,
+                 std::vector<typename TargetSpace::EmbeddedTangentVector>& localGradient) const
+{
+    // initialize gradient
+    localGradient.resize(localSolution.size());
+    std::fill(localGradient.begin(), localGradient.end(), typename TargetSpace::TangentVector(0));
+
+    // Set up local gfe function from the  local coefficients
+    LocalGeodesicFEFunction<gridDim, double, TargetSpace> localGeodesicFEFunction(localSolution);
+
+    // I am not sure about the correct quadrature order
+    int quadOrder = 1;//gridDim;
+
+    // numerical quadrature loop
+    const Dune::QuadratureRule<double, gridDim>& quad 
+        = Dune::QuadratureRules<double, gridDim>::rule(element.type(), quadOrder);
+    
+    for (size_t pt=0; pt<quad.size(); pt++) {
+        
+        // Local position of the quadrature point
+        const Dune::FieldVector<double,gridDim>& quadPos = quad[pt].position();
+        
+        const double integrationElement = element.geometry().integrationElement(quadPos);
+
+        const Dune::FieldMatrix<double,gridDim,gridDim>& jacobianInverseTransposed = element.geometry().jacobianInverseTransposed(quadPos);
+        
+        double weight = quad[pt].weight() * integrationElement;
+
+        // The derivative of the local function defined on the reference element
+        Dune::FieldMatrix<double, TargetSpace::EmbeddedTangentVector::size, gridDim> referenceDerivative = localGeodesicFEFunction.evaluateDerivative(quadPos);
+
+        // The derivative of the function defined on the actual element
+        Dune::FieldMatrix<double, TargetSpace::EmbeddedTangentVector::size, gridDim> derivative;
+
+        for (size_t comp=0; comp<referenceDerivative.N(); comp++)
+            jacobianInverseTransposed.mv(referenceDerivative[comp], derivative[comp]);
+        
+        // loop over all the element's degrees of freedom and compute the gradient wrt it
+        for (size_t i=0; i<localSolution.size(); i++) {
+         
+            Dune::array<Dune::FieldMatrix<double,gridDim,TargetSpace::EmbeddedTangentVector::size>, TargetSpace::EmbeddedTangentVector::size> derivativeDerivative;
+            localGeodesicFEFunction.evaluateDerivativeOfGradientWRTCoefficient(quadPos, i, derivativeDerivative);
+        
+            for (int j=0; j<derivative.rows; j++) {
+                
+                for (int k=0; k<derivative.cols; k++) {
+                    
+                    localGradient[i].axpy(weight*derivative[j][k], derivativeDerivative[j][k]);
+                    
+                }
+                
+            }
+            
+            
+            
+        }
+    }
+}
+
 #endif
 
diff --git a/dune/gfe/localgeodesicfefunction.hh b/dune/gfe/localgeodesicfefunction.hh
index e989edba93fee7f8e699cfc439fac2c2f8964759..ba464af7d6a2fac3820b39b693235d2dad408dbe 100644
--- a/dune/gfe/localgeodesicfefunction.hh
+++ b/dune/gfe/localgeodesicfefunction.hh
@@ -29,6 +29,20 @@ Dune::FieldMatrix< K, m, p > operator* ( const Dune::FieldMatrix< K, m, n > &A,
     return ret;
 }
 
+//! calculates ret = A - B
+template< class K, int m, int n>
+Dune::FieldMatrix<K,m,n> operator- ( const Dune::FieldMatrix<K, m, n> &A, const Dune::FieldMatrix<K,m,n> &B)
+{
+    typedef typename Dune::FieldMatrix<K,m,n> :: size_type size_type;
+    Dune::FieldMatrix<K,m,n> ret;
+        
+    for( size_type i = 0; i < m; ++i )
+        for( size_type j = 0; j < n; ++j )
+            ret[i][j] = A[i][j] - B[i][j];
+
+    return ret;
+}
+
 /** \brief A function defined by simplicial geodesic interpolation 
            from the reference element to a Riemannian manifold.
     
@@ -58,6 +72,11 @@ public:
 
     /** \brief For debugging: Evaluate the derivative of the function using a finite-difference approximation*/
     Dune::FieldMatrix<ctype, EmbeddedTangentVector::size, dim> evaluateDerivativeFD(const Dune::FieldVector<ctype, dim>& local) const;
+    
+    /** \brief Evaluate the derivative of the gradient of the function with respect to a coefficient */
+    void evaluateDerivativeOfGradientWRTCoefficient(const Dune::FieldVector<ctype, dim>& local,
+                                                    int coefficient,
+                                                    Dune::array<Dune::FieldMatrix<double,dim,TargetSpace::EmbeddedTangentVector::size>, TargetSpace::EmbeddedTangentVector::size>& result) const;
 
 private:
 
@@ -241,4 +260,56 @@ evaluateDerivativeFD(const Dune::FieldVector<ctype, dim>& local) const
     return result;
 }
 
+template <int dim, class ctype, class TargetSpace>
+void LocalGeodesicFEFunction<dim,ctype,TargetSpace>::
+evaluateDerivativeOfGradientWRTCoefficient(const Dune::FieldVector<ctype, dim>& local,
+                                           int coefficient,
+                                           Dune::array<Dune::FieldMatrix<double,dim,TargetSpace::EmbeddedTangentVector::size>, TargetSpace::EmbeddedTangentVector::size>& result) const
+{
+    const int targetDim = EmbeddedTangentVector::size;
+    
+    // the function value at the point where we are evaluating the derivative
+    TargetSpace q = evaluate(local);
+
+    // the matrix that turns coordinates on the reference simplex into coordinates on the standard simplex
+    Dune::FieldMatrix<ctype,dim+1,dim> B = referenceToBarycentricLinearPart();
+    
+    // compute derivate of F(w,q) (the derivative of the weighted distance fctl) wrt to w
+    Dune::FieldMatrix<ctype,targetDim,dim+1> dFdw;
+    for (int i=0; i<dim+1; i++) {
+        Dune::FieldVector<ctype,targetDim> tmp = TargetSpace::derivativeOfDistanceSquaredWRTSecondArgument(coefficients_[i], q);
+        for (int j=0; j<targetDim; j++)
+            dFdw[j][i] = tmp[j];
+    }
+    
+    // the actual system matrix
+    std::vector<ctype> w = barycentricCoordinates(local);
+    AverageDistanceAssembler<TargetSpace> assembler(coefficients_, w);
+    
+    Dune::FieldMatrix<ctype,targetDim,targetDim> dFdq(0);
+    assembler.assembleHessian(q,dFdq);
+    
+    //
+    std::array<Dune::FieldMatrix<double,targetDim,targetDim>, dim+1> dcDqF;
+    
+    
+    
+    std::array<Dune::FieldMatrix<double,targetDim,dim+1>, dim+1> dcDwF;
+    for (size_t i=0; i<dcDwF.size(); i++)
+        dcDwF[i] = TargetSpace::secondDerivativeOfDistanceSquaredWRTFirstAndSecondArgument(coefficients_[i], q);
+    
+    
+    // dFDq is not invertible, if the target space is embedded into a higher-dimensional
+    // Euclidean space.  Therefore we use its pseudo inverse.  I don't think that is the
+    // best way, though.
+    Dune::FieldMatrix<ctype,targetDim,targetDim> dFdqPseudoInv = pseudoInverse(dFdq);
+    
+    // Put it all together
+    for (size_t i=0; i<result.size(); i++) {
+        
+            result[i] = dFdqPseudoInv * ( dcDqF[i] * dFdqPseudoInv * dFdw - dcDwF[i]) * B;   
+     
+    }
+    
+}
 #endif